Added case matching

2006-01-08 16:30:31 +00:00 · 2006-01-08 16:30:31 +00:00 · f578ba68f5
parent 5a63c731f0
commit f578ba68f5
2 changed files with 498 additions and 2 deletions
--- a/gnoemoe/widgets/gm-searchable.c
+++ b/gnoemoe/widgets/gm-searchable.c
@ -1,5 +1,8 @@
 #include <string.h>
 #include "gm-searchable.h"
 #define GTK_TEXT_UNKNOWN_CHAR 0xFFFC
 GType 
 gm_searchable_get_type() {
 	static GType searchable_type = 0;
@ -25,6 +28,493 @@ gm_searchable_get_type() {
 }
 /* Searching implementations */
 static const gchar *
 pointer_from_offset_skipping_decomp (const gchar *str, gint offset)
 {
 	gchar *casefold, *normal;
 	const gchar *p, *q;
 	p = str;
 	while (offset > 0)
 	{
 		q = g_utf8_next_char (p);
 		casefold = g_utf8_casefold (p, q - p);
 		normal = g_utf8_normalize (casefold, -1, G_NORMALIZE_NFD);
 		offset -= g_utf8_strlen (normal, -1);
 		g_free (casefold);
 		g_free (normal);
 		p = q;
 	}
 	return p;
 }
 static const gchar *
 g_utf8_strcasestr (const gchar *haystack, const gchar *needle)
 {
 	gsize needle_len;
 	gsize haystack_len;
 	const gchar *ret = NULL;
 	gchar *p;
 	gchar *casefold;
 	gchar *caseless_haystack;
 	gint i;
 	g_return_val_if_fail (haystack != NULL, NULL);
 	g_return_val_if_fail (needle != NULL, NULL);
 	casefold = g_utf8_casefold (haystack, -1);
 	caseless_haystack = g_utf8_normalize (casefold, -1, G_NORMALIZE_NFD);
 	g_free (casefold);
 	needle_len = g_utf8_strlen (needle, -1);
 	haystack_len = g_utf8_strlen (caseless_haystack, -1);
 	if (needle_len == 0)
 	{
 		ret = (gchar *)haystack;
 		goto finally_1;
 	}
 	if (haystack_len < needle_len)
 	{
 		ret = NULL;
 		goto finally_1;
 	}
 	p = (gchar*)caseless_haystack;
 	needle_len = strlen (needle);
 	i = 0;
 	while (*p)
 	{
 		if ((strncmp (p, needle, needle_len) == 0))
 		{
 			ret = pointer_from_offset_skipping_decomp (haystack, i);
 			goto finally_1;
 		}
 		p = g_utf8_next_char (p);
 		i++;
 	}
 finally_1:
 	g_free (caseless_haystack);
 	return ret;
 }
 static const gchar *
 g_utf8_strrcasestr (const gchar *haystack, const gchar *needle)
 {
 	gsize needle_len;
 	gsize haystack_len;
 	const gchar *ret = NULL;
 	gchar *p;
 	gchar *casefold;
 	gchar *caseless_haystack;
 	gint i;
 	g_return_val_if_fail (haystack != NULL, NULL);
 	g_return_val_if_fail (needle != NULL, NULL);
 	casefold = g_utf8_casefold (haystack, -1);
 	caseless_haystack = g_utf8_normalize (casefold, -1, G_NORMALIZE_NFD);
 	g_free (casefold);
 	needle_len = g_utf8_strlen (needle, -1);
 	haystack_len = g_utf8_strlen (caseless_haystack, -1);
 	if (needle_len == 0)
 	{
 		ret = (gchar *)haystack;
 		goto finally_1;
 	}
 	if (haystack_len < needle_len)
 	{
 		ret = NULL;
 		goto finally_1;
 	}
 	i = haystack_len - needle_len;
 	p = g_utf8_offset_to_pointer (caseless_haystack, i);
 	needle_len = strlen (needle);
 	while (p >= caseless_haystack)
 	{
 		if (strncmp (p, needle, needle_len) == 0)
 		{
 			ret = pointer_from_offset_skipping_decomp (haystack, i);
 			goto finally_1;
 		}
 		p = g_utf8_prev_char (p);
 		i--;
 	}
 finally_1:
 	g_free (caseless_haystack);
 	return ret;
 }
 static gboolean
 g_utf8_caselessnmatch (const char *s1, const char *s2,
 		       gssize n1, gssize n2)
 {
 	gchar *casefold;
 	gchar *normalized_s1;
 	gchar *normalized_s2;
 	gint len_s1;
 	gint len_s2;
 	gboolean ret = FALSE;
 	g_return_val_if_fail (s1 != NULL, FALSE);
 	g_return_val_if_fail (s2 != NULL, FALSE);
 	g_return_val_if_fail (n1 > 0, FALSE);
 	g_return_val_if_fail (n2 > 0, FALSE);
 	casefold = g_utf8_casefold (s1, n1);
 	normalized_s1 = g_utf8_normalize (casefold, -1, G_NORMALIZE_NFD);
 	g_free (casefold);
 	casefold = g_utf8_casefold (s2, n2);
 	normalized_s2 = g_utf8_normalize (casefold, -1, G_NORMALIZE_NFD);
 	g_free (casefold);
 	len_s1 = strlen (normalized_s1);
 	len_s2 = strlen (normalized_s2);
 	if (len_s1 < len_s2)
 		goto finally_2;
 	ret = (strncmp (normalized_s1, normalized_s2, len_s2) == 0);
 finally_2:
 	g_free (normalized_s1);
 	g_free (normalized_s2);	
 	return ret;
 }
 static void
 forward_chars_with_skipping (GtkTextIter *iter,
 			     gint         count,
 			     gboolean     skip_decomp)
 {
 	gint i;
 	g_return_if_fail (count >= 0);
 	i = count;
 	while (i > 0)
 	{
 		gboolean ignored = FALSE;
 		/* minimal workaround to avoid the infinite loop of bug #168247.
 		 * It doesn't fix the problemjust the symptom...
 		 */
 		if (gtk_text_iter_is_end (iter))
 			return;
 		if (gtk_text_iter_get_char (iter) == GTK_TEXT_UNKNOWN_CHAR)
 			ignored = TRUE;
 		if (!ignored && 
 		    /* _gtk_text_btree_char_is_invisible (iter)*/ FALSE)
 			ignored = TRUE;
 		if (!ignored && skip_decomp)
 		{
 			/* being UTF8 correct sucks; this accounts for extra
 			   offsets coming from canonical decompositions of
 			   UTF8 characters (e.g. accented characters) which 
 			   g_utf8_normalize() performs */
 			gchar *normal;
 			gchar buffer[6];
 			gint buffer_len;
 			buffer_len = g_unichar_to_utf8 (gtk_text_iter_get_char (iter), buffer);
 			normal = g_utf8_normalize (buffer, buffer_len, G_NORMALIZE_NFD);
 			i -= (g_utf8_strlen (normal, -1) - 1);
 			g_free (normal);
 		}
 		gtk_text_iter_forward_char (iter);
 		if (!ignored)
 			--i;
 	}
 }
 static gboolean 
 lines_match (const GtkTextIter *start,
 		const gchar **lines,
 		GtkTextIter *match_start,
 		GtkTextIter *match_end) {
 	GtkTextIter next;
 	gchar *line_text;
 	const gchar *found;
 	gint offset;
 	if (*lines == NULL || **lines == '\0') {
 		if (match_start)
 			*match_start = *start;
 		if (match_end)
 			*match_end = *start;
 		return TRUE;
 	}
 	next = *start;
 	gtk_text_iter_forward_line(&next);
 	/* No more text in buffer, but *lines is nonempty */
 	if (gtk_text_iter_equal(start, &next))
 		return FALSE;
 	line_text = gtk_text_iter_get_visible_text(start, &next);
 	if (match_start) { /* if this is the first line we're matching */
 		found = g_utf8_strcasestr(line_text, *lines);
 	} else {
 		/* If it's not the first line, we have to match from the
 		 * start of the line.
 		 */
 		if (g_utf8_caselessnmatch(line_text, *lines, strlen (line_text),
 					   strlen (*lines)))
 			found = line_text;
 		else
 			found = NULL;
 	}
 	if (found == NULL) {
 		g_free(line_text);
 		return FALSE;
 	}
 	/* Get offset to start of search string */
 	offset = g_utf8_strlen(line_text, found - line_text);
 	next = *start;
 	/* If match start needs to be returned, set it to the
 	 * start of the search string.
 	 */
 	forward_chars_with_skipping(&next, offset, FALSE);
 	if (match_start) {
 		*match_start = next;
 	}
 	/* Go to end of search string */
 	forward_chars_with_skipping(&next, g_utf8_strlen (*lines, -1), TRUE);
 	g_free (line_text);
 	++lines;
 	if (match_end)
 		*match_end = next;
 	/* pass NULL for match_start, since we don't need to find the
 	 * start again.
 	 */
 	return lines_match(&next, lines, NULL, match_end);
 }
 static gboolean
 backward_lines_match (const GtkTextIter *start,
 		      const gchar      **lines,
 		      GtkTextIter       *match_start,
 		      GtkTextIter       *match_end) {
 	GtkTextIter line, next;
 	gchar *line_text;
 	const gchar *found;
 	gint offset;
 	if (*lines == NULL || **lines == '\0')
 	{
 		if (match_start)
 			*match_start = *start;
 		if (match_end)
 			*match_end = *start;
 		return TRUE;
 	}
 	line = next = *start;
 	if (gtk_text_iter_get_line_offset (&next) == 0)
 	{
 		if (!gtk_text_iter_backward_line (&next))
 			return FALSE;
 	}
 	else
 		gtk_text_iter_set_line_offset (&next, 0);
 	line_text = gtk_text_iter_get_visible_text (&next, &line);
 	if (match_start) /* if this is the first line we're matching */
 	{
 		found = g_utf8_strrcasestr (line_text, *lines);
 	}
 	else
 	{
 		/* If it's not the first line, we have to match from the
 		 * start of the line.
 		 */
 		if (g_utf8_caselessnmatch (line_text, *lines, strlen (line_text),
 					   strlen (*lines)))
 			found = line_text;
 		else
 			found = NULL;
 	}
 	if (found == NULL)
 	{
 		g_free (line_text);
 		return FALSE;
 	}
 	/* Get offset to start of search string */
 	offset = g_utf8_strlen (line_text, found - line_text);
 	forward_chars_with_skipping (&next, offset, FALSE);
 	/* If match start needs to be returned, set it to the
 	 * start of the search string.
 	 */
 	if (match_start)
 	{
 		*match_start = next;
 	}
 	/* Go to end of search string */
 	forward_chars_with_skipping (&next, g_utf8_strlen (*lines, -1), TRUE);
 	g_free (line_text);
 	++lines;
 	if (match_end)
 		*match_end = next;
 	/* try to match the rest of the lines forward, passing NULL
 	 * for match_start so lines_match will try to match the entire
 	 * line */
 	return lines_match (&next, lines, NULL, match_end);
 }
 static gchar **
 strbreakup(const char *string, const char *delimiter,
 		gint max_tokens) {
 	GSList *string_list = NULL, *slist;
 	gchar **str_array, *s, *casefold, *new_string;
 	guint i, n = 1;
 	g_return_val_if_fail (string != NULL, NULL);
 	g_return_val_if_fail (delimiter != NULL, NULL);
 	if (max_tokens < 1)
 		max_tokens = G_MAXINT;
 	s = strstr(string, delimiter);
 	if (s) {
 		guint delimiter_len = strlen(delimiter);
 		do {
 			guint len;
 			len = s - string + delimiter_len;
 			new_string = g_new(gchar, len + 1);
 			strncpy(new_string, string, len);
 			new_string[len] = 0;
 			casefold = g_utf8_casefold(new_string, -1);
 			g_free(new_string);
 			new_string = g_utf8_normalize(casefold, -1, G_NORMALIZE_NFD);
 			g_free(casefold);
 			string_list = g_slist_prepend(string_list, new_string);
 			n++;
 			string = s + delimiter_len;
 			s = strstr(string, delimiter);
 		} while (--max_tokens && s);
 	}
 	if (*string) {
 		n++;
 		casefold = g_utf8_casefold(string, -1);
 		new_string = g_utf8_normalize(casefold, -1, G_NORMALIZE_NFD);
 		g_free(casefold);
 		string_list = g_slist_prepend(string_list, new_string);
 	}
 	str_array = g_new(gchar*, n);
 	i = n - 1;
 	str_array[i--] = NULL;
 	for (slist = string_list; slist; slist = slist->next)
 		str_array[i--] = slist->data;
 	g_slist_free (string_list);
 	return str_array;
 }
 typedef gboolean (* search_fun)(const GtkTextIter *iter);
 gboolean
 gm_searchable_text_view_search_case_insensitive(GtkTextView *text_view, gchar
 		const *str, GmSearchableSearchFlags flags, GtkTextIter *start, 
 		GtkTextIter *end) {
 	gchar **lines;
 	lines = strbreakup(str, "\n", -1);
 	GtkTextIter search;
 	gboolean retval = FALSE;
 	if (flags & GM_SEARCHABLE_SEARCH_FORWARDS) {
 		search = *end;
 		do {
 			/* This loop has an inefficient worst-case, where
 			 * gtk_text_iter_get_text () is called repeatedly on
 			 * a single line.
 			 */
 			if (lines_match(&search, (const gchar**)lines, start, end)) {
 				retval = TRUE;
 				break;
 			}
 		} while (gtk_text_iter_forward_line(&search));
 	} else {
 		search = *start;
 		while (TRUE)
 		{
 			/* This loop has an inefficient worst-case, where
 			 * gtk_text_iter_get_text () is called repeatedly on
 			 * a single line.
 			 */
 			if (backward_lines_match (&search, (const gchar**)lines,
 						  start, end)) {
 				retval = TRUE;
 				break;
 			}
 			if (gtk_text_iter_get_line_offset (&search) == 0)
 			{
 				if (!gtk_text_iter_backward_line (&search))
 					break;
 			}
 			else
 			{
 				gtk_text_iter_set_line_offset (&search, 0);
 			}
 		}
 	}
 	g_strfreev ((gchar**)lines);
 	return retval;
 }
 gboolean
 gm_searchable_text_view_find_next(GtkTextView *text_view, const gchar *str, 
 		GmSearchableSearchFlags flags) {
@ -59,7 +549,12 @@ gm_searchable_text_view_find_next(GtkTextView *text_view, const gchar *str,
 			start = end;
 		}
-		if (flags & GM_SEARCHABLE_SEARCH_FORWARDS) {
+		if (flags & GM_SEARCHABLE_SEARCH_CASE_INSENSITIVE) {
 			match_start = start;
 			match_end = end;
 			found = gm_searchable_text_view_search_case_insensitive(text_view,
 					str, flags, &match_start, &match_end);
 		} else if (flags & GM_SEARCHABLE_SEARCH_FORWARDS) {
 			found = gtk_text_iter_forward_search(&end, str, 
 					GTK_TEXT_SEARCH_VISIBLE_ONLY | GTK_TEXT_SEARCH_TEXT_ONLY, 
 					&match_start, &match_end, NULL);
--- a/gnoemoe/widgets/gm-searchable.h
+++ b/gnoemoe/widgets/gm-searchable.h
@ -20,7 +20,8 @@ typedef struct _GmSearchableInterface GmSearchableInterface;
 typedef enum _GmSearchableSearchFlags {
 	GM_SEARCHABLE_SEARCH_NONE = 0,
 	GM_SEARCHABLE_SEARCH_FORWARDS = 1 << 0,
-	GM_SEARCHABLE_SEARCH_BACKWARDS = 1 << 1
+	GM_SEARCHABLE_SEARCH_BACKWARDS = 1 << 1,
 	GM_SEARCHABLE_SEARCH_CASE_INSENSITIVE = 1 << 2
 } GmSearchableSearchFlags;
 struct _GmSearchableInterface {