Fix #113092: take highlighted part of search item into account in more places

When using menu-search, only the last part of a search item is highlighted. When sorting the search results, this should be taken into account and the highlighted words should be prioritized. This was already partially implemented in 56e98f8ba6. Now it's also taken into account with prefix search. For example, `TC` now prefers `Input > Texture Coordinate` over `Texture > Checker Texture`.
2023-10-06 16:38:45 +02:00 · 2023-10-06 16:38:45 +02:00 · 0aa91a30dd
parent 046155572d
commit 0aa91a30dd
3 changed files with 107 additions and 68 deletions
--- a/source/blender/blenlib/BLI_string_search.hh
+++ b/source/blender/blenlib/BLI_string_search.hh
@ -15,7 +15,17 @@ namespace blender::string_search {
 struct SearchItem {
  void *user_data;
  Span<StringRef> normalized_words;
-  Span<float> word_weight_factors;
+  /**
+   * When using menu-search, the search item is often split into multiple groups of words, each of
+   * which corresponds to a menu entry. This id is the same for words in the same group and
+   * different otherwise.
+   */
+  Span<int> word_group_ids;
+  /**
+   * The id of the group that is highlighted in the UI. In some places, the words in this group are
+   * given higher weight.
+   */
+  int main_group_id;
  int length;
  int weight;
  /**
@ -108,6 +118,6 @@ int get_fuzzy_match_errors(StringRef query, StringRef full);
 void extract_normalized_words(StringRef str,
                              LinearAllocator<> &allocator,
                              Vector<StringRef, 64> &r_words,
-                              Vector<float, 64> &r_word_weights);
+                              Vector<int, 64> &r_word_group_ids);

 }  // namespace blender::string_search
--- a/source/blender/blenlib/intern/string_search.cc
+++ b/source/blender/blenlib/intern/string_search.cc
@ -157,27 +157,44 @@ int get_fuzzy_match_errors(StringRef query, StringRef full)

 static constexpr int unused_word = -1;

+struct InitialsMatch {
+  Vector<int> matched_word_indices;
+
+  int count_main_group_matches(const SearchItem &item) const
+  {
+    int count = 0;
+    for (const int i : this->matched_word_indices) {
+      if (item.word_group_ids[i] == item.main_group_id) {
+        count++;
+      }
+    }
+    return count;
+  }
+
+  bool better_than(const InitialsMatch &other, const SearchItem &item) const
+  {
+    return this->count_main_group_matches(item) > other.count_main_group_matches(item);
+  }
+};
+
 /**
 * Takes a query and tries to match it with the first characters of some words. For example, "msfv"
 * matches "Mark Sharp from Vertices". Multiple letters of the beginning of a word can be matched
 * as well. For example, "seboulo" matches "select boundary loop". The order of words is important.
 * So "bose" does not match "select boundary". However, individual words can be skipped. For
 * example, "rocc" matches "rotate edge ccw".
- *
- * \return true when the match was successful.
- * If it was successful, the used words are tagged in \a r_word_is_matched.
 */
-static bool match_word_initials(StringRef query,
-                                Span<StringRef> words,
-                                Span<int> word_match_map,
-                                MutableSpan<bool> r_word_is_matched,
-                                int start = 0)
+static std::optional<InitialsMatch> match_word_initials(StringRef query,
+                                                        const SearchItem &item,
+                                                        const Span<int> word_match_map,
+                                                        int start = 0)
 {
+  const Span<StringRef> words = item.normalized_words;
  if (start >= words.size()) {
-    return false;
+    return std::nullopt;
  }

-  r_word_is_matched.fill(false);
+  InitialsMatch match;

  size_t query_index = 0;
  int word_index = start;
@ -194,10 +211,9 @@ static bool match_word_initials(StringRef query,
        if (first_found_word_index >= 0) {
          /* Try starting to match at another word. In some cases one can still find matches this
           * way. */
-          return match_word_initials(
-              query, words, word_match_map, r_word_is_matched, first_found_word_index + 1);
+          return match_word_initials(query, item, word_match_map, first_found_word_index + 1);
        }
-        return false;
+        return std::nullopt;
      }

      /* Skip words that the caller does not want us to use. */
@ -213,7 +229,7 @@ static bool match_word_initials(StringRef query,
        const uint32_t char_unicode = BLI_str_utf8_as_unicode_step_safe(
            word.data(), word.size(), &char_index);
        if (query_unicode == char_unicode) {
-          r_word_is_matched[word_index] = true;
+          match.matched_word_indices.append(word_index);
          if (first_found_word_index == -1) {
            first_found_word_index = word_index;
          }
@ -226,15 +242,22 @@ static bool match_word_initials(StringRef query,
      char_index = 0;
    }
  }
-  return true;
+  /* Check if we can find a better match that starts at a later word. */
+  if (std::optional<InitialsMatch> sub_match = match_word_initials(
+          query, item, word_match_map, first_found_word_index + 1))
+  {
+    if (sub_match->better_than(match, item)) {
+      return sub_match;
+    }
+  }
+  return match;
 }

 /**
 * The "best" is chosen with combination of word weights and word length.
 */
 static int get_best_word_index_that_startswith(StringRef query,
-                                               Span<StringRef> words,
-                                               Span<float> word_weights,
+                                               const SearchItem &item,
                                               Span<int> word_match_map,
                                               Span<StringRef> remaining_query_words)
 {
@ -260,20 +283,29 @@ static int get_best_word_index_that_startswith(StringRef query,

  int best_word_size = INT32_MAX;
  int best_word_index = -1;
-  int best_word_weight = 0.0f;
-  for (const int i : words.index_range()) {
+  bool best_word_in_main_group = false;
+  for (const int i : item.normalized_words.index_range()) {
    if (word_match_map[i] != unused_word) {
      continue;
    }
-    StringRef word = words[i];
-    const float word_weight = word_weights[i];
+    StringRef word = item.normalized_words[i];
+    const bool word_in_main_group = item.word_group_ids[i] == item.main_group_id;
    if (word.startswith(query)) {
-      if ((use_shortest_match && word.size() < best_word_size) ||
-          (word.size() == best_word_size && word_weight > best_word_weight))
-      {
+      bool found_new_best = false;
+      if (use_shortest_match) {
+        if (word.size() < best_word_size) {
+          found_new_best = true;
+        }
+      }
+      else {
+        if (!best_word_in_main_group) {
+          found_new_best = true;
+        }
+      }
+      if (found_new_best) {
        best_word_index = i;
        best_word_size = word.size();
-        best_word_weight = word_weight;
+        best_word_in_main_group = word_in_main_group;
      }
    }
  }
@ -304,12 +336,11 @@ static int get_word_index_that_fuzzy_matches(StringRef query,
 * return value indicates how good the match is. The higher the value, the better the match.
 */
 static std::optional<float> score_query_against_words(Span<StringRef> query_words,
-                                                      Span<StringRef> result_words,
-                                                      Span<float> result_word_weights)
+                                                      const SearchItem &item)
 {
  /* A mapping from #result_words to #query_words. It's mainly used to determine if a word has been
   * matched already to avoid matching it again. */
-  Array<int, 64> word_match_map(result_words.size(), unused_word);
+  Array<int, 64> word_match_map(item.normalized_words.size(), unused_word);

  /* Start with some high score, because otherwise the final score might become negative. */
  float total_match_score = 1000;
@ -319,28 +350,25 @@ static std::optional<float> score_query_against_words(Span<StringRef> query_word
    {
      /* Check if any result word begins with the query word. */
      const int word_index = get_best_word_index_that_startswith(
-          query_word,
-          result_words,
-          result_word_weights,
-          word_match_map,
-          query_words.drop_front(query_word_index + 1));
+          query_word, item, word_match_map, query_words.drop_front(query_word_index + 1));
      if (word_index >= 0) {
-        total_match_score += 10 * result_word_weights[word_index];
+        /* Give a match in a main group higher priority. */
+        const bool is_main_group = item.word_group_ids[word_index] == item.main_group_id;
+        total_match_score += is_main_group ? 10 : 9;
        word_match_map[word_index] = query_word_index;
        continue;
      }
    }
    {
      /* Try to match against word initials. */
-      Array<bool, 64> matched_words(result_words.size());
-      const bool success = match_word_initials(
-          query_word, result_words, word_match_map, matched_words);
-      if (success) {
-        total_match_score += 3;
-        for (const int i : result_words.index_range()) {
-          if (matched_words[i]) {
-            word_match_map[i] = query_word_index;
-          }
+      if (std::optional<InitialsMatch> match = match_word_initials(
+              query_word, item, word_match_map)) {
+        /* If the all matched words are in the main group, give the match a higher priority. */
+        bool all_main_group_matches = match->count_main_group_matches(item) ==
+                                      match->matched_word_indices.size();
+        total_match_score += all_main_group_matches ? 4 : 3;
+        for (const int i : match->matched_word_indices) {
+          word_match_map[i] = query_word_index;
        }
        continue;
      }
@ -349,7 +377,7 @@ static std::optional<float> score_query_against_words(Span<StringRef> query_word
      /* Fuzzy match against words. */
      int error_count = 0;
      const int word_index = get_word_index_that_fuzzy_matches(
-          query_word, result_words, word_match_map, &error_count);
+          query_word, item.normalized_words, word_match_map, &error_count);
      if (word_index >= 0) {
        total_match_score += 3 - error_count;
        word_match_map[word_index] = query_word_index;
@ -384,7 +412,7 @@ static std::optional<float> score_query_against_words(Span<StringRef> query_word
 void extract_normalized_words(StringRef str,
                              LinearAllocator<> &allocator,
                              Vector<StringRef, 64> &r_words,
-                              Vector<float, 64> &r_word_weights)
+                              Vector<int, 64> &r_word_group_ids)
 {
  const uint32_t unicode_space = uint32_t(' ');
  const uint32_t unicode_slash = uint32_t('/');
@ -407,7 +435,7 @@ void extract_normalized_words(StringRef str,
  BLI_str_tolower_ascii(mutable_copy, str_size_in_bytes);

  /* Iterate over all unicode code points to split individual words. */
-  int current_section = 0;
+  int group_id = 0;
  bool is_in_word = false;
  size_t word_start = 0;
  size_t offset = 0;
@ -416,13 +444,13 @@ void extract_normalized_words(StringRef str,
    uint32_t unicode = BLI_str_utf8_as_unicode_step_safe(str.data(), str.size(), &size);
    size -= offset;
    if (unicode == unicode_right_triangle) {
-      current_section++;
+      group_id++;
    }
    if (is_separator(unicode)) {
      if (is_in_word) {
        const StringRef word = str_copy.substr(int(word_start), int(offset - word_start));
        r_words.append(word);
-        section_indices.append(current_section);
+        r_word_group_ids.append(group_id);
        is_in_word = false;
      }
    }
@ -438,28 +466,23 @@ void extract_normalized_words(StringRef str,
  if (is_in_word) {
    const StringRef word = str_copy.drop_prefix(int(word_start));
    r_words.append(word);
-    section_indices.append(current_section);
-  }
-
-  for (const int i : section_indices.index_range()) {
-    const int section = section_indices[i];
-    /* Give the last section a higher weight, because that's what is highlighted in the UI. */
-    const float word_weight = section == current_section ? 1.0f : 0.9f;
-    r_word_weights.append(word_weight);
+    r_word_group_ids.append(group_id);
  }
 }

 void StringSearchBase::add_impl(const StringRef str, void *user_data, const int weight)
 {
  Vector<StringRef, 64> words;
-  Vector<float, 64> word_weights;
-  string_search::extract_normalized_words(str, allocator_, words, word_weights);
+  Vector<int, 64> word_group_ids;
+  string_search::extract_normalized_words(str, allocator_, words, word_group_ids);
  const int recent_time = recent_cache_ ?
                              recent_cache_->logical_time_by_str.lookup_default(str, -1) :
                              -1;
+  const int main_group_id = word_group_ids.is_empty() ? 0 : word_group_ids.last();
  items_.append({user_data,
                 allocator_.construct_array_copy(words.as_span()),
-                 allocator_.construct_array_copy(word_weights.as_span()),
+                 allocator_.construct_array_copy(word_group_ids.as_span()),
+                 main_group_id,
                 int(str.size()),
                 weight,
                 recent_time});
@ -469,17 +492,17 @@ Vector<void *> StringSearchBase::query_impl(const StringRef query) const
 {
  LinearAllocator<> allocator;
  Vector<StringRef, 64> query_words;
-  /* The word weights are not actually used for the query. */
-  Vector<float, 64> word_weights;
-  string_search::extract_normalized_words(query, allocator, query_words, word_weights);
+  /* This is just a dummy value that is not used for the query. */
+  Vector<int, 64> word_group_ids;
+  string_search::extract_normalized_words(query, allocator, query_words, word_group_ids);

  /* Compute score of every result. */
  Array<std::optional<float>> all_scores(items_.size());
  threading::parallel_for(items_.index_range(), 256, [&](const IndexRange range) {
    for (const int i : range) {
      const SearchItem &item = items_[i];
-      const std::optional<float> score = string_search::score_query_against_words(
-          query_words, item.normalized_words, item.word_weight_factors);
+      const std::optional<float> score = string_search::score_query_against_words(query_words,
+                                                                                  item);
      all_scores[i] = score;
    }
  });
--- a/source/blender/blenlib/tests/BLI_string_search_test.cc
+++ b/source/blender/blenlib/tests/BLI_string_search_test.cc
@ -42,19 +42,25 @@ TEST(string_search, extract_normalized_words)
 {
  LinearAllocator<> allocator;
  Vector<StringRef, 64> words;
-  Vector<float, 64> word_weights;
+  Vector<int, 64> word_group_ids;
  extract_normalized_words("hello world" UI_MENU_ARROW_SEP "test   another test" UI_MENU_ARROW_SEP
                           " 3",
                           allocator,
                           words,
-                           word_weights);
+                           word_group_ids);
  EXPECT_EQ(words.size(), 6);
  EXPECT_EQ(words[0], "hello");
+  EXPECT_EQ(word_group_ids[0], 0);
  EXPECT_EQ(words[1], "world");
+  EXPECT_EQ(word_group_ids[1], 0);
  EXPECT_EQ(words[2], "test");
+  EXPECT_EQ(word_group_ids[2], 1);
  EXPECT_EQ(words[3], "another");
+  EXPECT_EQ(word_group_ids[3], 1);
  EXPECT_EQ(words[4], "test");
+  EXPECT_EQ(word_group_ids[4], 1);
  EXPECT_EQ(words[5], "3");
+  EXPECT_EQ(word_group_ids[5], 2);
 }

 }  // namespace blender::string_search::tests