From f578ba68f529581fc165c9c5f7076287a8e701de Mon Sep 17 00:00:00 2001 From: Jesse van den Kieboom Date: Sun, 8 Jan 2006 16:30:31 +0000 Subject: [PATCH] Added case matching --- gnoemoe/widgets/gm-searchable.c | 497 +++++++++++++++++++++++++++++++- gnoemoe/widgets/gm-searchable.h | 3 +- 2 files changed, 498 insertions(+), 2 deletions(-) diff --git a/gnoemoe/widgets/gm-searchable.c b/gnoemoe/widgets/gm-searchable.c index 882e4d9..3fa0194 100644 --- a/gnoemoe/widgets/gm-searchable.c +++ b/gnoemoe/widgets/gm-searchable.c @@ -1,5 +1,8 @@ +#include #include "gm-searchable.h" +#define GTK_TEXT_UNKNOWN_CHAR 0xFFFC + GType gm_searchable_get_type() { static GType searchable_type = 0; @@ -25,6 +28,493 @@ gm_searchable_get_type() { } /* Searching implementations */ +static const gchar * +pointer_from_offset_skipping_decomp (const gchar *str, gint offset) +{ + gchar *casefold, *normal; + const gchar *p, *q; + + p = str; + while (offset > 0) + { + q = g_utf8_next_char (p); + casefold = g_utf8_casefold (p, q - p); + normal = g_utf8_normalize (casefold, -1, G_NORMALIZE_NFD); + offset -= g_utf8_strlen (normal, -1); + g_free (casefold); + g_free (normal); + p = q; + } + return p; +} + +static const gchar * +g_utf8_strcasestr (const gchar *haystack, const gchar *needle) +{ + gsize needle_len; + gsize haystack_len; + const gchar *ret = NULL; + gchar *p; + gchar *casefold; + gchar *caseless_haystack; + gint i; + + g_return_val_if_fail (haystack != NULL, NULL); + g_return_val_if_fail (needle != NULL, NULL); + + casefold = g_utf8_casefold (haystack, -1); + caseless_haystack = g_utf8_normalize (casefold, -1, G_NORMALIZE_NFD); + g_free (casefold); + + needle_len = g_utf8_strlen (needle, -1); + haystack_len = g_utf8_strlen (caseless_haystack, -1); + + if (needle_len == 0) + { + ret = (gchar *)haystack; + goto finally_1; + } + + if (haystack_len < needle_len) + { + ret = NULL; + goto finally_1; + } + + p = (gchar*)caseless_haystack; + needle_len = strlen (needle); + i = 0; + + while (*p) + { + if ((strncmp (p, needle, needle_len) == 0)) + { + ret = pointer_from_offset_skipping_decomp (haystack, i); + goto finally_1; + } + + p = g_utf8_next_char (p); + i++; + } + +finally_1: + g_free (caseless_haystack); + + return ret; +} + +static const gchar * +g_utf8_strrcasestr (const gchar *haystack, const gchar *needle) +{ + gsize needle_len; + gsize haystack_len; + const gchar *ret = NULL; + gchar *p; + gchar *casefold; + gchar *caseless_haystack; + gint i; + + g_return_val_if_fail (haystack != NULL, NULL); + g_return_val_if_fail (needle != NULL, NULL); + + casefold = g_utf8_casefold (haystack, -1); + caseless_haystack = g_utf8_normalize (casefold, -1, G_NORMALIZE_NFD); + g_free (casefold); + + needle_len = g_utf8_strlen (needle, -1); + haystack_len = g_utf8_strlen (caseless_haystack, -1); + + if (needle_len == 0) + { + ret = (gchar *)haystack; + goto finally_1; + } + + if (haystack_len < needle_len) + { + ret = NULL; + goto finally_1; + } + + i = haystack_len - needle_len; + p = g_utf8_offset_to_pointer (caseless_haystack, i); + needle_len = strlen (needle); + + while (p >= caseless_haystack) + { + if (strncmp (p, needle, needle_len) == 0) + { + ret = pointer_from_offset_skipping_decomp (haystack, i); + goto finally_1; + } + + p = g_utf8_prev_char (p); + i--; + } + +finally_1: + g_free (caseless_haystack); + + return ret; +} + +static gboolean +g_utf8_caselessnmatch (const char *s1, const char *s2, + gssize n1, gssize n2) +{ + gchar *casefold; + gchar *normalized_s1; + gchar *normalized_s2; + gint len_s1; + gint len_s2; + gboolean ret = FALSE; + + g_return_val_if_fail (s1 != NULL, FALSE); + g_return_val_if_fail (s2 != NULL, FALSE); + g_return_val_if_fail (n1 > 0, FALSE); + g_return_val_if_fail (n2 > 0, FALSE); + + casefold = g_utf8_casefold (s1, n1); + normalized_s1 = g_utf8_normalize (casefold, -1, G_NORMALIZE_NFD); + g_free (casefold); + + casefold = g_utf8_casefold (s2, n2); + normalized_s2 = g_utf8_normalize (casefold, -1, G_NORMALIZE_NFD); + g_free (casefold); + + len_s1 = strlen (normalized_s1); + len_s2 = strlen (normalized_s2); + + if (len_s1 < len_s2) + goto finally_2; + + ret = (strncmp (normalized_s1, normalized_s2, len_s2) == 0); + +finally_2: + g_free (normalized_s1); + g_free (normalized_s2); + + return ret; +} + +static void +forward_chars_with_skipping (GtkTextIter *iter, + gint count, + gboolean skip_decomp) +{ + gint i; + + g_return_if_fail (count >= 0); + + i = count; + + while (i > 0) + { + gboolean ignored = FALSE; + + /* minimal workaround to avoid the infinite loop of bug #168247. + * It doesn't fix the problemjust the symptom... + */ + if (gtk_text_iter_is_end (iter)) + return; + + if (gtk_text_iter_get_char (iter) == GTK_TEXT_UNKNOWN_CHAR) + ignored = TRUE; + + if (!ignored && + /* _gtk_text_btree_char_is_invisible (iter)*/ FALSE) + ignored = TRUE; + + if (!ignored && skip_decomp) + { + /* being UTF8 correct sucks; this accounts for extra + offsets coming from canonical decompositions of + UTF8 characters (e.g. accented characters) which + g_utf8_normalize() performs */ + gchar *normal; + gchar buffer[6]; + gint buffer_len; + + buffer_len = g_unichar_to_utf8 (gtk_text_iter_get_char (iter), buffer); + normal = g_utf8_normalize (buffer, buffer_len, G_NORMALIZE_NFD); + i -= (g_utf8_strlen (normal, -1) - 1); + g_free (normal); + } + + gtk_text_iter_forward_char (iter); + + if (!ignored) + --i; + } +} + +static gboolean +lines_match (const GtkTextIter *start, + const gchar **lines, + GtkTextIter *match_start, + GtkTextIter *match_end) { + GtkTextIter next; + gchar *line_text; + const gchar *found; + gint offset; + + if (*lines == NULL || **lines == '\0') { + if (match_start) + *match_start = *start; + if (match_end) + *match_end = *start; + return TRUE; + } + + next = *start; + gtk_text_iter_forward_line(&next); + + /* No more text in buffer, but *lines is nonempty */ + if (gtk_text_iter_equal(start, &next)) + return FALSE; + + line_text = gtk_text_iter_get_visible_text(start, &next); + + if (match_start) { /* if this is the first line we're matching */ + found = g_utf8_strcasestr(line_text, *lines); + } else { + /* If it's not the first line, we have to match from the + * start of the line. + */ + if (g_utf8_caselessnmatch(line_text, *lines, strlen (line_text), + strlen (*lines))) + found = line_text; + else + found = NULL; + } + + if (found == NULL) { + g_free(line_text); + return FALSE; + } + + /* Get offset to start of search string */ + offset = g_utf8_strlen(line_text, found - line_text); + + next = *start; + + /* If match start needs to be returned, set it to the + * start of the search string. + */ + forward_chars_with_skipping(&next, offset, FALSE); + if (match_start) { + *match_start = next; + } + + /* Go to end of search string */ + forward_chars_with_skipping(&next, g_utf8_strlen (*lines, -1), TRUE); + + g_free (line_text); + + ++lines; + + if (match_end) + *match_end = next; + + /* pass NULL for match_start, since we don't need to find the + * start again. + */ + return lines_match(&next, lines, NULL, match_end); +} + +static gboolean +backward_lines_match (const GtkTextIter *start, + const gchar **lines, + GtkTextIter *match_start, + GtkTextIter *match_end) { + GtkTextIter line, next; + gchar *line_text; + const gchar *found; + gint offset; + + if (*lines == NULL || **lines == '\0') + { + if (match_start) + *match_start = *start; + if (match_end) + *match_end = *start; + return TRUE; + } + + line = next = *start; + if (gtk_text_iter_get_line_offset (&next) == 0) + { + if (!gtk_text_iter_backward_line (&next)) + return FALSE; + } + else + gtk_text_iter_set_line_offset (&next, 0); + + line_text = gtk_text_iter_get_visible_text (&next, &line); + + if (match_start) /* if this is the first line we're matching */ + { + found = g_utf8_strrcasestr (line_text, *lines); + } + else + { + /* If it's not the first line, we have to match from the + * start of the line. + */ + if (g_utf8_caselessnmatch (line_text, *lines, strlen (line_text), + strlen (*lines))) + found = line_text; + else + found = NULL; + } + + if (found == NULL) + { + g_free (line_text); + return FALSE; + } + + /* Get offset to start of search string */ + offset = g_utf8_strlen (line_text, found - line_text); + + forward_chars_with_skipping (&next, offset, FALSE); + + /* If match start needs to be returned, set it to the + * start of the search string. + */ + if (match_start) + { + *match_start = next; + } + + /* Go to end of search string */ + forward_chars_with_skipping (&next, g_utf8_strlen (*lines, -1), TRUE); + + g_free (line_text); + + ++lines; + + if (match_end) + *match_end = next; + + /* try to match the rest of the lines forward, passing NULL + * for match_start so lines_match will try to match the entire + * line */ + return lines_match (&next, lines, NULL, match_end); +} + +static gchar ** +strbreakup(const char *string, const char *delimiter, + gint max_tokens) { + GSList *string_list = NULL, *slist; + gchar **str_array, *s, *casefold, *new_string; + guint i, n = 1; + + g_return_val_if_fail (string != NULL, NULL); + g_return_val_if_fail (delimiter != NULL, NULL); + + if (max_tokens < 1) + max_tokens = G_MAXINT; + + s = strstr(string, delimiter); + + if (s) { + guint delimiter_len = strlen(delimiter); + + do { + guint len; + + len = s - string + delimiter_len; + new_string = g_new(gchar, len + 1); + strncpy(new_string, string, len); + new_string[len] = 0; + casefold = g_utf8_casefold(new_string, -1); + g_free(new_string); + new_string = g_utf8_normalize(casefold, -1, G_NORMALIZE_NFD); + g_free(casefold); + string_list = g_slist_prepend(string_list, new_string); + n++; + string = s + delimiter_len; + s = strstr(string, delimiter); + } while (--max_tokens && s); + } + + if (*string) { + n++; + casefold = g_utf8_casefold(string, -1); + new_string = g_utf8_normalize(casefold, -1, G_NORMALIZE_NFD); + g_free(casefold); + string_list = g_slist_prepend(string_list, new_string); + } + + str_array = g_new(gchar*, n); + + i = n - 1; + + str_array[i--] = NULL; + for (slist = string_list; slist; slist = slist->next) + str_array[i--] = slist->data; + + g_slist_free (string_list); + + return str_array; +} + +typedef gboolean (* search_fun)(const GtkTextIter *iter); + +gboolean +gm_searchable_text_view_search_case_insensitive(GtkTextView *text_view, gchar + const *str, GmSearchableSearchFlags flags, GtkTextIter *start, + GtkTextIter *end) { + gchar **lines; + lines = strbreakup(str, "\n", -1); + GtkTextIter search; + gboolean retval = FALSE; + + if (flags & GM_SEARCHABLE_SEARCH_FORWARDS) { + search = *end; + do { + /* This loop has an inefficient worst-case, where + * gtk_text_iter_get_text () is called repeatedly on + * a single line. + */ + + if (lines_match(&search, (const gchar**)lines, start, end)) { + retval = TRUE; + break; + } + } while (gtk_text_iter_forward_line(&search)); + } else { + search = *start; + while (TRUE) + { + /* This loop has an inefficient worst-case, where + * gtk_text_iter_get_text () is called repeatedly on + * a single line. + */ + + if (backward_lines_match (&search, (const gchar**)lines, + start, end)) { + retval = TRUE; + break; + } + + if (gtk_text_iter_get_line_offset (&search) == 0) + { + if (!gtk_text_iter_backward_line (&search)) + break; + } + else + { + gtk_text_iter_set_line_offset (&search, 0); + } + } + } + + g_strfreev ((gchar**)lines); + return retval; +} + gboolean gm_searchable_text_view_find_next(GtkTextView *text_view, const gchar *str, GmSearchableSearchFlags flags) { @@ -59,7 +549,12 @@ gm_searchable_text_view_find_next(GtkTextView *text_view, const gchar *str, start = end; } - if (flags & GM_SEARCHABLE_SEARCH_FORWARDS) { + if (flags & GM_SEARCHABLE_SEARCH_CASE_INSENSITIVE) { + match_start = start; + match_end = end; + found = gm_searchable_text_view_search_case_insensitive(text_view, + str, flags, &match_start, &match_end); + } else if (flags & GM_SEARCHABLE_SEARCH_FORWARDS) { found = gtk_text_iter_forward_search(&end, str, GTK_TEXT_SEARCH_VISIBLE_ONLY | GTK_TEXT_SEARCH_TEXT_ONLY, &match_start, &match_end, NULL); diff --git a/gnoemoe/widgets/gm-searchable.h b/gnoemoe/widgets/gm-searchable.h index 4e059b4..d18f46d 100644 --- a/gnoemoe/widgets/gm-searchable.h +++ b/gnoemoe/widgets/gm-searchable.h @@ -20,7 +20,8 @@ typedef struct _GmSearchableInterface GmSearchableInterface; typedef enum _GmSearchableSearchFlags { GM_SEARCHABLE_SEARCH_NONE = 0, GM_SEARCHABLE_SEARCH_FORWARDS = 1 << 0, - GM_SEARCHABLE_SEARCH_BACKWARDS = 1 << 1 + GM_SEARCHABLE_SEARCH_BACKWARDS = 1 << 1, + GM_SEARCHABLE_SEARCH_CASE_INSENSITIVE = 1 << 2 } GmSearchableSearchFlags; struct _GmSearchableInterface {