[Debian-hebrew-package] [svn] r289 - in pkg/hocr/trunk: . debian
examples/hocr examples/hocr-gtk po src
Lior Kaplan
kaplan-guest at costa.debian.org
Sat Dec 24 22:02:04 UTC 2005
Author: kaplan-guest
Date: Sat Dec 24 22:01:57 2005
New Revision: 289
Modified:
pkg/hocr/trunk/configure
pkg/hocr/trunk/configure.in
pkg/hocr/trunk/debian/changelog
pkg/hocr/trunk/examples/hocr-gtk/Makefile.am
pkg/hocr/trunk/examples/hocr-gtk/Makefile.in
pkg/hocr/trunk/examples/hocr-gtk/callbacks.c
pkg/hocr/trunk/examples/hocr-gtk/callbacks.h
pkg/hocr/trunk/examples/hocr-gtk/interface.c
pkg/hocr/trunk/examples/hocr-gtk/interface.h
pkg/hocr/trunk/examples/hocr/Makefile.am
pkg/hocr/trunk/examples/hocr/Makefile.in
pkg/hocr/trunk/libhocr.pc
pkg/hocr/trunk/po/he.gmo
pkg/hocr/trunk/po/he.po
pkg/hocr/trunk/po/hocr-gtk.pot
pkg/hocr/trunk/src/Makefile.am
pkg/hocr/trunk/src/Makefile.in
pkg/hocr/trunk/src/font_layout.c
pkg/hocr/trunk/src/font_recognition.c
pkg/hocr/trunk/src/hocr.c
pkg/hocr/trunk/src/hocr_object.c
pkg/hocr/trunk/src/hocr_pixbuf.c
pkg/hocr/trunk/src/hocr_pixbuf.h
pkg/hocr/trunk/src/page_layout.c
Log:
Merged diffs with version 0.6.5
Modified: pkg/hocr/trunk/configure
==============================================================================
--- pkg/hocr/trunk/configure (original)
+++ pkg/hocr/trunk/configure Sat Dec 24 22:01:57 2005
@@ -1803,7 +1803,7 @@
PACKAGE=hocr
-VERSION=0.6.4
+VERSION=0.6.5
if test "`cd $srcdir && pwd`" != "`pwd`" && test -f $srcdir/config.status; then
{ { echo "$as_me:$LINENO: error: source directory already configured; run \"make distclean\" there first" >&5
@@ -20193,7 +20193,7 @@
fi
-PACKAGE_CFLAGS=
+PACKAGE_CFLAGS=-Wall
PACKAGE_LIBS=-lm
win32=no
Modified: pkg/hocr/trunk/configure.in
==============================================================================
--- pkg/hocr/trunk/configure.in (original)
+++ pkg/hocr/trunk/configure.in Sat Dec 24 22:01:57 2005
@@ -2,7 +2,7 @@
AC_INIT(configure.in)
AC_CANONICAL_SYSTEM
-AM_INIT_AUTOMAKE(hocr, 0.6.4)
+AM_INIT_AUTOMAKE(hocr, 0.6.5)
AM_CONFIG_HEADER(config.h)
AM_MAINTAINER_MODE
@@ -17,7 +17,7 @@
AC_PATH_PROG(PKG_CONFIG, pkg-config, no)
-PACKAGE_CFLAGS=
+PACKAGE_CFLAGS=-Wall
PACKAGE_LIBS=-lm
win32=no
Modified: pkg/hocr/trunk/debian/changelog
==============================================================================
--- pkg/hocr/trunk/debian/changelog (original)
+++ pkg/hocr/trunk/debian/changelog Sat Dec 24 22:01:57 2005
@@ -1,8 +1,8 @@
-hocr (0.6.4-1) UNRELEASED; urgency=low
+hocr (0.6.5-1) unstable; urgency=low
- * (NOT RELEASED YET) New upstream release
+ * New upstream release
- -- Lior Kaplan <webmaster at guides.co.il> Sat, 24 Dec 2005 20:42:52 +0200
+ -- Lior Kaplan <webmaster at guides.co.il> Sat, 24 Dec 2005 23:58:50 +0200
hocr (0.4.6-1) unstable; urgency=low
Modified: pkg/hocr/trunk/examples/hocr-gtk/Makefile.am
==============================================================================
--- pkg/hocr/trunk/examples/hocr-gtk/Makefile.am (original)
+++ pkg/hocr/trunk/examples/hocr-gtk/Makefile.am Sat Dec 24 22:01:57 2005
@@ -11,7 +11,11 @@
-DPACKAGE_LOCALE_DIR=\""$(prefix)/$(DATADIRNAME)/locale"\" \
@PACKAGE_CFLAGS@ @gtk_CFLAGS@ @gtkspell_CFLAGS@ -I$(top_srcdir)/src
-hocr_gtk_LDADD = ../../src/*.o @PACKAGE_LIBS@ @gtk_LIBS@ @gtkspell_LIBS@ $(INTLLIBS)
+#if WITH_WIN32
+#hocr_gtk_LDADD = ../../src/*.o @PACKAGE_LIBS@ @gtk_LIBS@ @gtkspell_LIBS@ $(INTLLIBS)
+#else
+hocr_gtk_LDADD = $(ldadd) @PACKAGE_LIBS@ @gtk_LIBS@ @gtkspell_LIBS@ $(INTLLIBS)
+#endif
else
@@ -20,7 +24,11 @@
-DPACKAGE_LOCALE_DIR=\""$(prefix)/$(DATADIRNAME)/locale"\" \
@PACKAGE_CFLAGS@ @gtk_CFLAGS@ -I$(top_srcdir)/src
-hocr_gtk_LDADD = ../../src/*.o @PACKAGE_LIBS@ @gtk_LIBS@ $(INTLLIBS)
+#if WITH_WIN32
+#hocr_gtk_LDADD = ../../src/*.o @PACKAGE_LIBS@ @gtk_LIBS@ $(INTLLIBS)
+#else
+hocr_gtk_LDADD = $(ldadd) @PACKAGE_LIBS@ @gtk_LIBS@ $(INTLLIBS)
+#endif
endif
@@ -36,9 +44,13 @@
hocr_gtk_DEPENDENCIES = $(DEPS)
-hocr-gtk$(EXEEXT): $(hocr_gtk_OBJECTS) $(hocr_gtk_DEPENDENCIES)
- @rm -f hocr-gtk$(EXEEXT)
- $(CC) -o hocr-gtk$(EXEEXT) $(hocr_gtk_LDFLAGS) $(hocr_gtk_OBJECTS) $(hocr_gtk_LDADD) $(LIBS)
+#if WITH_WIN32
+
+#hocr-gtk$(EXEEXT): $(hocr_gtk_OBJECTS) $(hocr_gtk_DEPENDENCIES)
+# @rm -f hocr-gtk$(EXEEXT)
+# $(CC) -o hocr-gtk$(EXEEXT) $(hocr_gtk_LDFLAGS) $(hocr_gtk_OBJECTS) $(hocr_gtk_LDADD) $(LIBS)
+
+#endif
hocrdesktopdir = ${prefix}/share/applications
hocrdesktop_DATA = \
Modified: pkg/hocr/trunk/examples/hocr-gtk/Makefile.in
==============================================================================
--- pkg/hocr/trunk/examples/hocr-gtk/Makefile.in (original)
+++ pkg/hocr/trunk/examples/hocr-gtk/Makefile.in Sat Dec 24 22:01:57 2005
@@ -116,12 +116,17 @@
gtkspell_LIBS = @gtkspell_LIBS@
EXTRA_DIST = hocr.desktop
+#endif
@WITH_GTK_TRUE@@WITH_GTKSPELL_TRUE at INCLUDES = -DPACKAGE_DATA_DIR=\""$(datadir)"\" -DWITH_GTKSPELL -DPACKAGE_LOCALE_DIR=\""$(prefix)/$(DATADIRNAME)/locale"\" @PACKAGE_CFLAGS@ @gtk_CFLAGS@ @gtkspell_CFLAGS@ -I$(top_srcdir)/src
@WITH_GTK_TRUE@@WITH_GTKSPELL_FALSE at INCLUDES = -DPACKAGE_DATA_DIR=\""$(datadir)"\" -DPACKAGE_LOCALE_DIR=\""$(prefix)/$(DATADIRNAME)/locale"\" @PACKAGE_CFLAGS@ @gtk_CFLAGS@ -I$(top_srcdir)/src
- at WITH_GTK_TRUE@@WITH_GTKSPELL_TRUE at hocr_gtk_LDADD = ../../src/*.o @PACKAGE_LIBS@ @gtk_LIBS@ @gtkspell_LIBS@ $(INTLLIBS)
- at WITH_GTK_TRUE@@WITH_GTKSPELL_FALSE at hocr_gtk_LDADD = ../../src/*.o @PACKAGE_LIBS@ @gtk_LIBS@ $(INTLLIBS)
+#if WITH_WIN32
+#hocr_gtk_LDADD = ../../src/*.o @PACKAGE_LIBS@ @gtk_LIBS@ $(INTLLIBS)
+#else
+ at WITH_GTK_TRUE@@WITH_GTKSPELL_TRUE at hocr_gtk_LDADD = $(ldadd) @PACKAGE_LIBS@ @gtk_LIBS@ @gtkspell_LIBS@ $(INTLLIBS)
+ at WITH_GTK_TRUE@@WITH_GTKSPELL_FALSE at hocr_gtk_LDADD = $(ldadd) @PACKAGE_LIBS@ @gtk_LIBS@ $(INTLLIBS)
+#endif
@WITH_GTK_TRUE at DEPS = $(top_builddir)/src/libhocr.la
@WITH_GTK_TRUE at ldadd = $(top_builddir)/src/libhocr.la
@@ -132,6 +137,14 @@
@WITH_GTK_TRUE at hocr_gtk_DEPENDENCIES = $(DEPS)
+#if WITH_WIN32
+
+#hocr-gtk$(EXEEXT): $(hocr_gtk_OBJECTS) $(hocr_gtk_DEPENDENCIES)
+# @rm -f hocr-gtk$(EXEEXT)
+# $(CC) -o hocr-gtk$(EXEEXT) $(hocr_gtk_LDFLAGS) $(hocr_gtk_OBJECTS) $(hocr_gtk_LDADD) $(LIBS)
+
+#endif
+
@WITH_GTK_TRUE at hocrdesktopdir = ${prefix}/share/applications
@WITH_GTK_TRUE at hocrdesktop_DATA = hocr.desktop
mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs
@@ -148,9 +161,9 @@
@WITH_GTK_TRUE at hocr_gtk_OBJECTS = hocr-gtk.$(OBJEXT) \
@WITH_GTK_TRUE at interface.$(OBJEXT) callbacks.$(OBJEXT)
@WITH_GTK_TRUE@@WITH_GTKSPELL_FALSE at hocr_gtk_DEPENDENCIES = \
- at WITH_GTK_TRUE@@WITH_GTKSPELL_FALSE at ../../src/*.o
+ at WITH_GTK_TRUE@@WITH_GTKSPELL_FALSE@$(top_builddir)/src/libhocr.la
@WITH_GTK_TRUE@@WITH_GTKSPELL_TRUE at hocr_gtk_DEPENDENCIES = \
- at WITH_GTK_TRUE@@WITH_GTKSPELL_TRUE at ../../src/*.o
+ at WITH_GTK_TRUE@@WITH_GTKSPELL_TRUE@$(top_builddir)/src/libhocr.la
hocr_gtk_LDFLAGS =
CFLAGS = @CFLAGS@
COMPILE = $(CC) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
@@ -249,6 +262,10 @@
maintainer-clean-libtool:
+hocr-gtk$(EXEEXT): $(hocr_gtk_OBJECTS) $(hocr_gtk_DEPENDENCIES)
+ @rm -f hocr-gtk$(EXEEXT)
+ $(LINK) $(hocr_gtk_LDFLAGS) $(hocr_gtk_OBJECTS) $(hocr_gtk_LDADD) $(LIBS)
+
install-hocrdesktopDATA: $(hocrdesktop_DATA)
@$(NORMAL_INSTALL)
$(mkinstalldirs) $(DESTDIR)$(hocrdesktopdir)
@@ -394,10 +411,6 @@
mostlyclean distclean maintainer-clean
- at WITH_GTK_TRUE@hocr-gtk$(EXEEXT): $(hocr_gtk_OBJECTS) $(hocr_gtk_DEPENDENCIES)
- at WITH_GTK_TRUE@ @rm -f hocr-gtk$(EXEEXT)
- at WITH_GTK_TRUE@ $(CC) -o hocr-gtk$(EXEEXT) $(hocr_gtk_LDFLAGS) $(hocr_gtk_OBJECTS) $(hocr_gtk_LDADD) $(LIBS)
-
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
.NOEXPORT:
Modified: pkg/hocr/trunk/examples/hocr-gtk/callbacks.c
==============================================================================
--- pkg/hocr/trunk/examples/hocr-gtk/callbacks.c (original)
+++ pkg/hocr/trunk/examples/hocr-gtk/callbacks.c Sat Dec 24 22:01:57 2005
@@ -38,16 +38,84 @@
#include "interface.h"
#include "hocr.h"
+static GStaticMutex mutex = G_STATIC_MUTEX_INIT;
+
GdkPixbuf *pixbuf = NULL;
GdkPixbuf *vis_pixbuf = NULL;
-int
-do_ocr (GdkPixbuf * pixbuf, GtkTextBuffer * text_buffer)
+typedef struct _text_struct
{
- hocr_pixbuf *hocr_pix;
+ GtkTextBuffer *text_buffer;
hocr_text_buffer *text;
+} text_struct;
+
+gboolean
+draw_progress_bar (gpointer data)
+{
+ hocr_pixbuf *hocr_pix = (hocr_pixbuf *) data;
+
+ gtk_progress_bar_set_fraction (GTK_PROGRESS_BAR (pbar),
+ (double) hocr_pix->progress / 256.0);
+
+ return TRUE;
+}
+
+gboolean
+redraw_pixbuf (gpointer data)
+{
+ int width, height;
+ GtkTextBuffer *text_buffer = ((text_struct *) data)->text_buffer;
+ hocr_text_buffer *text = ((text_struct *) data)->text;
GtkTextIter iter;
+ height = gdk_pixbuf_get_height (vis_pixbuf);
+ width = gdk_pixbuf_get_width (vis_pixbuf);
+
+ if (pixbuf)
+ {
+ if (vis_pixbuf)
+ {
+ g_object_unref (vis_pixbuf);
+ vis_pixbuf = NULL;
+ }
+
+ vis_pixbuf = gdk_pixbuf_scale_simple (pixbuf, width,
+ height,
+ GDK_INTERP_BILINEAR);
+
+ gtk_image_set_from_pixbuf (GTK_IMAGE (image), vis_pixbuf);
+ }
+
+ /* insert the text to the text editor */
+ gtk_text_buffer_get_end_iter (text_buffer, &iter);
+
+ gtk_text_buffer_insert (text_buffer, &iter, text->text, -1);
+
+ /* unref text_buffer */
+ hocr_text_buffer_unref (text);
+
+ gtk_widget_queue_draw (textview);
+ gtk_widget_queue_draw (image);
+
+ return FALSE;
+}
+
+gpointer
+ocr_thread (gpointer data)
+{
+ static GtkTextBuffer *text_buffer;
+ static hocr_text_buffer *text;
+ static hocr_pixbuf *hocr_pix;
+ static text_struct text_struct_instance;
+
+ guint timeout_id;
+
+ /* only one run is posible */
+ if (!g_static_mutex_trylock (&mutex))
+ return data;
+
+ text_buffer = (GtkTextBuffer *) data;
+
hocr_pix = hocr_pixbuf_new (); /* get an empty hocr_pix */
if (!hocr_pix)
{
@@ -55,6 +123,14 @@
return 0;
}
+ /* create text buffer */
+ text = hocr_text_buffer_new ();
+ if (!text)
+ {
+ printf ("hocr-gtk: can\'t allocate memory for text out\n");
+ return 0;
+ }
+
/* clear text before ocr ? */
if (gtk_check_menu_item_get_active (GTK_CHECK_MENU_ITEM (clear_text)))
gtk_text_buffer_set_text (text_buffer, "", -1);
@@ -77,51 +153,62 @@
hocr_pix->command |= HOCR_COMMAND_OCR;
/* use dict ? */
- if (gtk_check_menu_item_get_active (GTK_CHECK_MENU_ITEM
- (use_dict)))
- hocr_pix->command |= HOCR_COMMAND_DICT;
+ if (gtk_check_menu_item_get_active (GTK_CHECK_MENU_ITEM (use_dict)))
+ hocr_pix->command |= HOCR_COMMAND_DICT;
/* use nikud ? */
- if (gtk_check_menu_item_get_active (GTK_CHECK_MENU_ITEM
- (use_nikud)))
- hocr_pix->command |= HOCR_COMMAND_NIKUD;
-
+ if (gtk_check_menu_item_get_active (GTK_CHECK_MENU_ITEM (use_nikud)))
+ hocr_pix->command |= HOCR_COMMAND_NIKUD;
+
/* use spaces ? */
- if (gtk_check_menu_item_get_active (GTK_CHECK_MENU_ITEM
- (use_spaces)))
- hocr_pix->command |= HOCR_COMMAND_USE_SPACE_FOR_TAB;
-
+ if (gtk_check_menu_item_get_active (GTK_CHECK_MENU_ITEM (use_spaces)))
+ hocr_pix->command |= HOCR_COMMAND_USE_SPACE_FOR_TAB;
+
/* use indentation ? */
- if (gtk_check_menu_item_get_active (GTK_CHECK_MENU_ITEM
- (use_indent)))
- hocr_pix->command |= HOCR_COMMAND_USE_INDENTATION;
-
+ if (gtk_check_menu_item_get_active (GTK_CHECK_MENU_ITEM (use_indent)))
+ hocr_pix->command |= HOCR_COMMAND_USE_INDENTATION;
+
hocr_pix->n_channels = gdk_pixbuf_get_n_channels (pixbuf);
hocr_pix->height = gdk_pixbuf_get_height (pixbuf);
hocr_pix->width = gdk_pixbuf_get_width (pixbuf);
hocr_pix->rowstride = gdk_pixbuf_get_rowstride (pixbuf);
hocr_pix->pixels = (unsigned char *) (gdk_pixbuf_get_pixels (pixbuf));
- /* create text buffer */
- text = hocr_text_buffer_new ();
- if (!text)
- {
- printf ("hocr-gtk: can\'t allocate memory for text out\n");
- return 0;
- }
+ /* open the ocr thread */
+ hocr_pix->progress = 0;
+ hocr_pix->progress_phase = 0;
+
+ /* set time out function to draw the progress bar */
+ timeout_id =
+ gtk_timeout_add (200, draw_progress_bar, (gpointer) hocr_pix);
+ /* call the ocr function */
hocr_do_ocr (hocr_pix, text);
- gtk_text_buffer_get_end_iter (text_buffer, &iter);
- gtk_text_buffer_insert (text_buffer, &iter, text->text, -1);
+ /* remove time out function to draw the progress bar */
+ gtk_timeout_remove (timeout_id);
+ gtk_progress_bar_set_fraction (GTK_PROGRESS_BAR (pbar), 0.0);
/* unref hocr_pixbuf */
- hocr_pix->pixels = NULL; /* do not unreff the original GTK
- * picture */
+ hocr_pix->pixels = NULL;
hocr_pixbuf_unref (hocr_pix);
- /* unref text_buffer */
- hocr_text_buffer_unref (text);
+ /* redraw the pixbuf */
+ text_struct_instance.text = text;
+ text_struct_instance.text_buffer = text_buffer;
+ gtk_timeout_add (100, redraw_pixbuf, (gpointer) & text_struct_instance);
+
+ /* unlock mutex */
+ g_static_mutex_unlock (&mutex);
+
+ return data;
+}
+
+int
+do_ocr (GdkPixbuf * pixbuf, GtkTextBuffer * text_buffer)
+{
+ /* set ocr as a thread */
+ g_thread_create (ocr_thread, (gpointer) text_buffer, FALSE, NULL);
return 1;
}
@@ -160,17 +247,26 @@
char *filename;
char title[255];
- GtkWidget *preview_frame = gtk_frame_new ("preview");
- GtkWidget *preview = gtk_image_new ();
- GtkWidget *my_file_chooser =
+ GtkWidget *preview_frame;
+ GtkWidget *preview;
+ GtkWidget *my_file_chooser;
+
+ /* only one run is posible */
+ if (!g_static_mutex_trylock (&mutex))
+ return;
+
+ preview_frame = gtk_frame_new ("preview");
+
+ preview = gtk_image_new ();
+
+ my_file_chooser =
gtk_file_chooser_dialog_new ("hocr open image",
GTK_WINDOW (window1),
GTK_FILE_CHOOSER_ACTION_OPEN,
GTK_STOCK_CANCEL,
GTK_RESPONSE_CANCEL,
GTK_STOCK_OPEN,
- GTK_RESPONSE_ACCEPT,
- NULL);
+ GTK_RESPONSE_ACCEPT, NULL);
gtk_widget_show (preview);
gtk_container_add (GTK_CONTAINER (preview_frame), preview);
@@ -206,6 +302,11 @@
}
gtk_widget_destroy (my_file_chooser);
+
+ /* unlock mutex */
+ g_static_mutex_unlock (&mutex);
+
+ return;
}
int
@@ -426,6 +527,8 @@
on_window1_delete_event (GtkWidget * widget,
GdkEvent * event, gpointer user_data)
{
+ set_rc_file ();
+
if (pixbuf)
{
g_object_unref (pixbuf);
@@ -438,6 +541,8 @@
vis_pixbuf = NULL;
}
+ g_free (font_name);
+
gtk_main_quit ();
return FALSE;
}
@@ -445,6 +550,8 @@
void
on_toolbutton_quit_clicked (GtkToolButton * toolbutton, gpointer user_data)
{
+ set_rc_file ();
+
if (pixbuf)
{
g_object_unref (pixbuf);
@@ -507,11 +614,10 @@
/* get the new font name */
g_free (font_name);
font_name = g_strdup (gtk_font_selection_dialog_get_font_name
- (GTK_FONT_SELECTION_DIALOG (fsd)));
-
+ (GTK_FONT_SELECTION_DIALOG (fsd)));
+
/* Change default font throughout the text widget */
- font_desc = pango_font_description_from_string
- (font_name);
+ font_desc = pango_font_description_from_string (font_name);
gtk_widget_modify_font (textview, font_desc);
pango_font_description_free (font_desc);
@@ -557,3 +663,207 @@
{
on_toolbutton_about_clicked (NULL, NULL);
}
+
+void
+set_rc_file ()
+{
+ GKeyFile *key_file;
+ gchar *pathname = NULL;
+ GError *error = NULL;
+ gchar *content;
+
+ /* get menu items */
+
+ /* color boxes ? */
+ color_text_box_arg = gtk_check_menu_item_get_active
+ (GTK_CHECK_MENU_ITEM (color_text_box));
+
+ /* color misread fonts ? */
+ color_misread_arg = gtk_check_menu_item_get_active
+ (GTK_CHECK_MENU_ITEM (color_misread));
+
+ /* do ocr ? */
+ ocr_arg = gtk_check_menu_item_get_active (GTK_CHECK_MENU_ITEM (ocr));
+
+ /* use dict ? */
+ use_dict_arg =
+ gtk_check_menu_item_get_active (GTK_CHECK_MENU_ITEM (use_dict));
+
+ /* use nikud ? */
+ use_nikud_arg =
+ gtk_check_menu_item_get_active (GTK_CHECK_MENU_ITEM
+ (use_nikud));
+
+ /* use spaces ? */
+ use_spaces_arg =
+ gtk_check_menu_item_get_active (GTK_CHECK_MENU_ITEM
+ (use_spaces));
+
+ /* use indentation ? */
+ use_indent_arg =
+ gtk_check_menu_item_get_active (GTK_CHECK_MENU_ITEM
+ (use_indent));
+
+ /* get path */
+ pathname = g_strdup_printf ("%s%s%s", g_get_home_dir (),
+ G_DIR_SEPARATOR_S, ".hocr-gtk.rc");
+
+ /* create new key file data */
+ key_file = g_key_file_new ();
+
+ /* color boxes ? */
+ g_key_file_set_boolean (key_file, "hocr-gtk",
+ "color_text_box", color_text_box_arg);
+
+ /* color misread fonts ? */
+ g_key_file_set_boolean (key_file, "hocr-gtk",
+ "color_misread_arg", color_misread_arg);
+
+ /* do ocr ? */
+ g_key_file_set_boolean (key_file, "hocr-gtk", "ocr_arg", ocr_arg);
+
+ /* use dict ? */
+ g_key_file_set_boolean (key_file, "hocr-gtk",
+ "use_dict_arg", use_dict_arg);
+
+ /* use nikud ? */
+ g_key_file_set_boolean (key_file, "hocr-gtk",
+ "use_nikud_arg", use_nikud_arg);
+
+ /* use spaces ? */
+ g_key_file_set_boolean (key_file, "hocr-gtk",
+ "use_spaces_arg", use_spaces_arg);
+
+ /* use indentation ? */
+ g_key_file_set_boolean (key_file, "hocr-gtk",
+ "use_indent_arg", use_indent_arg);
+
+ /* font name */
+ g_key_file_set_string (key_file, "hocr-gtk", "font_name", font_name);
+
+ /* save data */
+ content = g_key_file_to_data (key_file, NULL, NULL);
+ g_file_set_contents (pathname, content, -1, &error);
+
+ g_key_file_free (key_file);
+ g_free (content);
+ g_free (pathname);
+}
+
+void
+get_rc_file ()
+{
+ gchar *pathname = NULL;
+ GError *error = NULL;
+ PangoFontDescription *font_desc;
+
+ /* get path */
+ pathname = g_strdup_printf ("%s%s%s", g_get_home_dir (),
+ G_DIR_SEPARATOR_S, ".hocr-gtk.rc");
+
+ /* is file exist ? */
+ if (g_file_test (pathname, G_FILE_TEST_EXISTS))
+ {
+ GKeyFile *key_file;
+
+ /* create new key file data */
+ key_file = g_key_file_new ();
+
+ if (g_key_file_load_from_file (key_file, pathname, 0, &error))
+ {
+ /* color boxes ? */
+ color_text_box_arg =
+ g_key_file_get_boolean (key_file, "hocr-gtk",
+ "color_text_box",
+ &error);
+
+ /* color misread fonts ? */
+ color_misread_arg =
+ g_key_file_get_boolean (key_file, "hocr-gtk",
+ "color_misread_arg",
+ &error);
+
+ /* do ocr ? */
+ ocr_arg =
+ g_key_file_get_boolean (key_file, "hocr-gtk",
+ "ocr_arg", &error);
+
+ /* use dict ? */
+ use_dict_arg =
+ g_key_file_get_boolean (key_file, "hocr-gtk",
+ "use_dict_arg", &error);
+
+ /* use nikud ? */
+ use_nikud_arg =
+ g_key_file_get_boolean (key_file, "hocr-gtk",
+ "use_nikud_arg",
+ &error);
+
+ /* use spaces ? */
+ use_spaces_arg =
+ g_key_file_get_boolean (key_file, "hocr-gtk",
+ "use_spaces_arg",
+ &error);
+
+ /* use indentation ? */
+ use_indent_arg =
+ g_key_file_get_boolean (key_file, "hocr-gtk",
+ "use_indent_arg",
+ &error);
+
+ /* font name */
+ if (font_name)
+ g_free (font_name);
+
+ font_name = g_key_file_get_string (key_file, "hocr-gtk",
+ "font_name", &error);
+
+ }
+
+ g_key_file_free (key_file);
+ }
+ else
+ {
+ /* if no file create one */
+ set_rc_file ();
+ }
+
+ /* set menu items */
+
+ /* color boxes ? */
+ gtk_check_menu_item_set_active
+ (GTK_CHECK_MENU_ITEM (color_text_box), color_text_box_arg);
+
+ /* color misread fonts ? */
+ gtk_check_menu_item_set_active
+ (GTK_CHECK_MENU_ITEM (color_misread), color_misread_arg);
+
+ /* do ocr ? */
+ gtk_check_menu_item_set_active (GTK_CHECK_MENU_ITEM (ocr), ocr_arg);
+
+ /* use dict ? */
+ gtk_check_menu_item_set_active (GTK_CHECK_MENU_ITEM (use_dict),
+ use_dict_arg);
+
+ /* use nikud ? */
+ gtk_check_menu_item_set_active (GTK_CHECK_MENU_ITEM (use_nikud),
+ use_nikud_arg);
+
+ /* use spaces ? */
+ gtk_check_menu_item_set_active (GTK_CHECK_MENU_ITEM (use_spaces),
+ use_spaces_arg);
+
+ /* use indentation ? */
+ gtk_check_menu_item_set_active (GTK_CHECK_MENU_ITEM (use_indent),
+ use_indent_arg);
+
+ /* Change default font throughout the text widget */
+ font_desc = pango_font_description_from_string (font_name);
+
+ gtk_widget_modify_font (textview, font_desc);
+ pango_font_description_free (font_desc);
+
+ g_free (pathname);
+
+ return;
+}
Modified: pkg/hocr/trunk/examples/hocr-gtk/callbacks.h
==============================================================================
--- pkg/hocr/trunk/examples/hocr-gtk/callbacks.h (original)
+++ pkg/hocr/trunk/examples/hocr-gtk/callbacks.h Sat Dec 24 22:01:57 2005
@@ -81,3 +81,7 @@
void on_normal_size_activate (GtkMenuItem * menuitem, gpointer user_data);
void on_about_activate (GtkMenuItem * menuitem, gpointer user_data);
+
+void get_rc_file ();
+
+void set_rc_file ();
Modified: pkg/hocr/trunk/examples/hocr-gtk/interface.c
==============================================================================
--- pkg/hocr/trunk/examples/hocr-gtk/interface.c (original)
+++ pkg/hocr/trunk/examples/hocr-gtk/interface.c Sat Dec 24 22:01:57 2005
@@ -109,17 +109,12 @@
GtkWidget *toolitem3;
GtkWidget *vseparator3;
GtkWidget *toolbutton_about;
- GtkWidget *toolitem4;
- GtkWidget *vseparator4;
- GtkWidget *toolbutton_quit;
/* image */
GtkWidget *vpaned1;
GtkWidget *scrolledwindow_image;
/* text */
- GtkWidget *viewport1;
- GtkWidget *scrolledwindow_text;
GtkTooltips *tooltips;
PangoFontDescription *font_desc;
@@ -236,7 +231,8 @@
gtk_widget_show (use_dict);
gtk_container_add (GTK_CONTAINER (menuitem2_menu), use_dict);
gtk_tooltips_set_tip (tooltips, use_dict,
- _("Try to guess unrecognized fonts in scaned text using internal dictionary"),
+ _
+ ("Try to guess unrecognized fonts in scaned text using internal dictionary"),
NULL);
gtk_check_menu_item_set_active (GTK_CHECK_MENU_ITEM (use_dict), FALSE);
@@ -244,26 +240,26 @@
gtk_widget_show (use_nikud);
gtk_container_add (GTK_CONTAINER (menuitem2_menu), use_nikud);
gtk_tooltips_set_tip (tooltips, use_nikud,
- _("Try to guess nikud"),
- NULL);
+ _("Try to guess nikud"), NULL);
gtk_check_menu_item_set_active (GTK_CHECK_MENU_ITEM (use_nikud), TRUE);
use_spaces = gtk_check_menu_item_new_with_mnemonic (_("Use spaces"));
gtk_widget_show (use_spaces);
gtk_container_add (GTK_CONTAINER (menuitem2_menu), use_spaces);
gtk_tooltips_set_tip (tooltips, use_spaces,
- _("Use spaces for tabs"),
- NULL);
- gtk_check_menu_item_set_active (GTK_CHECK_MENU_ITEM (use_spaces), FALSE);
+ _("Use spaces for tabs"), NULL);
+ gtk_check_menu_item_set_active (GTK_CHECK_MENU_ITEM (use_spaces),
+ FALSE);
- use_indent = gtk_check_menu_item_new_with_mnemonic (_("Use indentation"));
+ use_indent =
+ gtk_check_menu_item_new_with_mnemonic (_("Use indentation"));
gtk_widget_show (use_indent);
gtk_container_add (GTK_CONTAINER (menuitem2_menu), use_indent);
gtk_tooltips_set_tip (tooltips, use_indent,
- _("Try to guess line indentation"),
- NULL);
- gtk_check_menu_item_set_active (GTK_CHECK_MENU_ITEM (use_indent), FALSE);
-
+ _("Try to guess line indentation"), NULL);
+ gtk_check_menu_item_set_active (GTK_CHECK_MENU_ITEM (use_indent),
+ FALSE);
+
menuitem3 = gtk_menu_item_new_with_mnemonic (_("_View"));
gtk_widget_show (menuitem3);
gtk_container_add (GTK_CONTAINER (menubar1), menuitem3);
@@ -380,21 +376,7 @@
gtk_container_add (GTK_CONTAINER (toolbar), toolbutton_about);
gtk_tool_item_set_tooltip (GTK_TOOL_ITEM (toolbutton_about), tooltips,
_("About this application"), NULL);
-/*
- toolitem4 = (GtkWidget *) gtk_tool_item_new ();
- gtk_widget_show (toolitem4);
- gtk_container_add (GTK_CONTAINER (toolbar), toolitem4);
- vseparator4 = gtk_vseparator_new ();
- gtk_widget_show (vseparator4);
- gtk_container_add (GTK_CONTAINER (toolitem4), vseparator4);
-
- toolbutton_quit =
- (GtkWidget *) gtk_tool_button_new_from_stock ("gtk-quit");
- gtk_widget_show (toolbutton_quit);
- gtk_container_add (GTK_CONTAINER (toolbar), toolbutton_quit);
- gtk_tool_item_set_tooltip (GTK_TOOL_ITEM (toolbutton_quit), tooltips,
- _("Quit this program"), NULL);
-*/
+
/* image */
vpaned1 = gtk_vpaned_new ();
gtk_widget_show (vpaned1);
@@ -431,12 +413,25 @@
textview = gtk_text_view_new ();
gtk_widget_show (textview);
gtk_container_add (GTK_CONTAINER (scrolledwindow_text), textview);
- font_desc = pango_font_description_from_string
- (font_name);
+ font_desc = pango_font_description_from_string (font_name);
gtk_widget_modify_font (textview, font_desc);
font_name = g_strdup (TEXT_FONT_NAME);
+ /* progress bar */
+ hbox2 = gtk_hbox_new (FALSE, 0);
+ gtk_widget_show (hbox2);
+ gtk_box_pack_start (GTK_BOX (vbox1), hbox2, FALSE, FALSE, 0);
+
+ pbar = gtk_progress_bar_new ();
+ gtk_widget_show (pbar);
+ gtk_box_pack_start (GTK_BOX (hbox2), pbar, FALSE, FALSE, 0);
+ gtk_widget_set_size_request (pbar, 100, -1);
+
+ statusbar1 = gtk_statusbar_new ();
+ gtk_widget_show (statusbar1);
+ gtk_box_pack_start (GTK_BOX (hbox2), statusbar1, TRUE, TRUE, 0);
+
/* main window */
g_signal_connect ((gpointer) window1, "delete_event",
G_CALLBACK (on_window1_delete_event), NULL);
@@ -463,11 +458,9 @@
G_CALLBACK (on_toolbutton_spell_clicked), NULL);
#endif
-/*
- g_signal_connect ((gpointer) toolbutton_quit, "clicked",
- G_CALLBACK (on_toolbutton_quit_clicked), NULL);
- */
-
+ /* readf rc file */
+ get_rc_file();
+
/* menu */
g_signal_connect ((gpointer) open, "activate",
G_CALLBACK (on_open_activate), NULL);
Modified: pkg/hocr/trunk/examples/hocr-gtk/interface.h
==============================================================================
--- pkg/hocr/trunk/examples/hocr-gtk/interface.h (original)
+++ pkg/hocr/trunk/examples/hocr-gtk/interface.h Sat Dec 24 22:01:57 2005
@@ -53,6 +53,9 @@
/* main window */
GtkWidget *window1;
+GtkWidget *pbar;
+GtkWidget *hbox2;
+GtkWidget *statusbar1;
/* menu check boxs */
GtkWidget *color_text_box;
@@ -66,10 +69,21 @@
/* image and text of main window */
GtkWidget *image;
+GtkWidget *viewport1;
+GtkWidget *scrolledwindow_text;
GtkWidget *textview;
/* user font */
-char * font_name;
+char *font_name;
+
+/* user pref */
+gboolean color_text_box_arg;
+gboolean color_misread_arg;
+gboolean ocr_arg;
+gboolean use_dict_arg;
+gboolean use_nikud_arg;
+gboolean use_indent_arg;
+gboolean use_spaces_arg;
/* user interface for window */
GtkWidget *create_window1 (void);
Modified: pkg/hocr/trunk/examples/hocr/Makefile.am
==============================================================================
--- pkg/hocr/trunk/examples/hocr/Makefile.am (original)
+++ pkg/hocr/trunk/examples/hocr/Makefile.am Sat Dec 24 22:01:57 2005
@@ -17,12 +17,21 @@
hocr_SOURCES = \
hocr-cmd.c
+#if WITH_WIN32
+
+#hocr_DEPENDENCIES = $(DEPS)
+#hocr_LDADD = ../../src/*.o $(INTLLIBS)
+
+#hocr$(EXEEXT): $(hocr_OBJECTS) $(hocr_DEPENDENCIES)
+# @rm -f hocr$(EXEEXT)
+# $(CC) -o hocr$(EXEEXT) @PACKAGE_LIBS@ $(hocr_LDFLAGS) $(hocr_OBJECTS) $(hocr_LDADD) $(LIBS)
+
+#else
+
hocr_DEPENDENCIES = $(DEPS)
-hocr_LDADD = ../../src/*.o $(INTLLIBS)
+hocr_LDADD = $(ldadd) $(INTLLIBS)
-hocr$(EXEEXT): $(hocr_OBJECTS) $(hocr_DEPENDENCIES)
- @rm -f hocr$(EXEEXT)
- $(CC) -o hocr$(EXEEXT) @PACKAGE_LIBS@ $(hocr_LDFLAGS) $(hocr_OBJECTS) $(hocr_LDADD) $(LIBS)
+#endif
man_MANS = hocr.1
Modified: pkg/hocr/trunk/examples/hocr/Makefile.in
==============================================================================
--- pkg/hocr/trunk/examples/hocr/Makefile.in (original)
+++ pkg/hocr/trunk/examples/hocr/Makefile.in Sat Dec 24 22:01:57 2005
@@ -126,8 +126,21 @@
@WITH_CMD_TRUE at hocr_SOURCES = hocr-cmd.c
+#if WITH_WIN32
+
+#hocr_DEPENDENCIES = $(DEPS)
+#hocr_LDADD = ../../src/*.o $(INTLLIBS)
+
+#hocr$(EXEEXT): $(hocr_OBJECTS) $(hocr_DEPENDENCIES)
+# @rm -f hocr$(EXEEXT)
+# $(CC) -o hocr$(EXEEXT) @PACKAGE_LIBS@ $(hocr_LDFLAGS) $(hocr_OBJECTS) $(hocr_LDADD) $(LIBS)
+
+#else
+
@WITH_CMD_TRUE at hocr_DEPENDENCIES = $(DEPS)
- at WITH_CMD_TRUE@hocr_LDADD = ../../src/*.o $(INTLLIBS)
+ at WITH_CMD_TRUE@hocr_LDADD = $(ldadd) $(INTLLIBS)
+
+#endif
@WITH_CMD_TRUE at man_MANS = hocr.1
mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs
@@ -242,6 +255,10 @@
maintainer-clean-libtool:
+hocr$(EXEEXT): $(hocr_OBJECTS) $(hocr_DEPENDENCIES)
+ @rm -f hocr$(EXEEXT)
+ $(LINK) $(hocr_LDFLAGS) $(hocr_OBJECTS) $(hocr_LDADD) $(LIBS)
+
install-man1:
$(mkinstalldirs) $(DESTDIR)$(man1dir)
@list='$(man1_MANS)'; \
@@ -405,10 +422,6 @@
mostlyclean distclean maintainer-clean
- at WITH_CMD_TRUE@hocr$(EXEEXT): $(hocr_OBJECTS) $(hocr_DEPENDENCIES)
- at WITH_CMD_TRUE@ @rm -f hocr$(EXEEXT)
- at WITH_CMD_TRUE@ $(CC) -o hocr$(EXEEXT) @PACKAGE_LIBS@ $(hocr_LDFLAGS) $(hocr_OBJECTS) $(hocr_LDADD) $(LIBS)
-
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
.NOEXPORT:
Modified: pkg/hocr/trunk/libhocr.pc
==============================================================================
--- pkg/hocr/trunk/libhocr.pc (original)
+++ pkg/hocr/trunk/libhocr.pc Sat Dec 24 22:01:57 2005
@@ -5,6 +5,6 @@
Name: libhocr
Description: A c library for hebrew optical character recognition
-Version: 0.6.4
+Version: 0.6.5
Libs: -L${libdir} -lm -lhocr
Cflags: -I${includedir}
Modified: pkg/hocr/trunk/po/he.gmo
==============================================================================
Binary files. No diff available.
Modified: pkg/hocr/trunk/po/he.po
==============================================================================
--- pkg/hocr/trunk/po/he.po (original)
+++ pkg/hocr/trunk/po/he.po Sat Dec 24 22:01:57 2005
@@ -8,7 +8,7 @@
msgstr ""
"Project-Id-Version: hocr 0.4.0\n"
"Report-Msgid-Bugs-To: \n"
-"POT-Creation-Date: 2005-12-21 11:56+0200\n"
+"POT-Creation-Date: 2005-12-24 23:20+0200\n"
"PO-Revision-Date: 2005-08-17 02:49+0300\n"
"Last-Translator: kzamir <kzamir at walla.co.il>\n"
"Language-Team: Hebrew <LL at li.org>\n"
@@ -16,135 +16,135 @@
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
-#: examples/hocr-gtk/interface.c:133
+#: examples/hocr-gtk/interface.c:128
msgid "hocr-gui"
msgstr ""
-#: examples/hocr-gtk/interface.c:145
+#: examples/hocr-gtk/interface.c:140
msgid "_File"
msgstr "קובץ"
-#: examples/hocr-gtk/interface.c:173
+#: examples/hocr-gtk/interface.c:168
msgid "_Edit"
msgstr "עריכה"
-#: examples/hocr-gtk/interface.c:204
+#: examples/hocr-gtk/interface.c:199
msgid "Color boxes"
msgstr "צבע מסגרות"
-#: examples/hocr-gtk/interface.c:207
+#: examples/hocr-gtk/interface.c:202
msgid "Color text boxes"
msgstr "צבע מסגרות של אזורי טקסט"
-#: examples/hocr-gtk/interface.c:213
+#: examples/hocr-gtk/interface.c:208
msgid "Color misread"
msgstr "צבע אותיות"
-#: examples/hocr-gtk/interface.c:216
+#: examples/hocr-gtk/interface.c:211
msgid "Color misread fonts"
msgstr "צבע אותיות שלא פענחו"
-#: examples/hocr-gtk/interface.c:221
+#: examples/hocr-gtk/interface.c:216
msgid "Clear"
msgstr "נקה"
-#: examples/hocr-gtk/interface.c:225
+#: examples/hocr-gtk/interface.c:220
msgid "Clear text each time you ocr new scan"
msgstr "נקה את אזור הטקסט לפני פענוח טקסט חדש"
-#: examples/hocr-gtk/interface.c:228
+#: examples/hocr-gtk/interface.c:223
msgid "Ocr"
msgstr ""
-#: examples/hocr-gtk/interface.c:232
+#: examples/hocr-gtk/interface.c:227
msgid "Try to recognize fonts in scaned text"
msgstr "נסה לפענח אותיות"
-#: examples/hocr-gtk/interface.c:235
+#: examples/hocr-gtk/interface.c:230
msgid "Use dictionary"
msgstr "מילון"
-#: examples/hocr-gtk/interface.c:239
+#: examples/hocr-gtk/interface.c:235
#, fuzzy
msgid ""
"Try to guess unrecognized fonts in scaned text using internal dictionary"
msgstr "נסה לתקן מילים שגויות בעזרת מילון פנימי"
-#: examples/hocr-gtk/interface.c:243
+#: examples/hocr-gtk/interface.c:239
msgid "Use nikud"
msgstr "ניקוד"
-#: examples/hocr-gtk/interface.c:247
+#: examples/hocr-gtk/interface.c:243
msgid "Try to guess nikud"
msgstr "נסה לנחש את הניקוד של האות המזוהה"
-#: examples/hocr-gtk/interface.c:251
+#: examples/hocr-gtk/interface.c:246
msgid "Use spaces"
msgstr "רווח"
-#: examples/hocr-gtk/interface.c:255
+#: examples/hocr-gtk/interface.c:250
msgid "Use spaces for tabs"
msgstr "השתמש ברווחים במקום סימני טאב"
-#: examples/hocr-gtk/interface.c:259
+#: examples/hocr-gtk/interface.c:255
msgid "Use indentation"
msgstr "עימוד"
-#: examples/hocr-gtk/interface.c:263
+#: examples/hocr-gtk/interface.c:259
msgid "Try to guess line indentation"
msgstr "נסה לנחש את העימוד של השורה המזוהה"
-#: examples/hocr-gtk/interface.c:267
+#: examples/hocr-gtk/interface.c:263
msgid "_View"
msgstr "מראה"
-#: examples/hocr-gtk/interface.c:291
+#: examples/hocr-gtk/interface.c:287
msgid "_Help"
msgstr "עזרה"
-#: examples/hocr-gtk/interface.c:298
+#: examples/hocr-gtk/interface.c:294
msgid "_About"
msgstr "אודות"
-#: examples/hocr-gtk/interface.c:315
+#: examples/hocr-gtk/interface.c:311
msgid "Open a new picture for the OCR"
msgstr "פתח תמונה חדשה לצורך פענוח"
-#: examples/hocr-gtk/interface.c:322
+#: examples/hocr-gtk/interface.c:318
msgid "Convert picture to text"
msgstr "נסה לפענח תמונה"
-#: examples/hocr-gtk/interface.c:329
+#: examples/hocr-gtk/interface.c:325
msgid "Save the text created by the OCR"
msgstr "שמור את הטקסט המפוענח"
-#: examples/hocr-gtk/interface.c:345
+#: examples/hocr-gtk/interface.c:341
msgid "Spell check the text"
msgstr "בדוק איות"
-#: examples/hocr-gtk/interface.c:382
+#: examples/hocr-gtk/interface.c:378
msgid "About this application"
msgstr "אודות ישום זה"
#. set the window title
-#: examples/hocr-gtk/callbacks.c:200
+#: examples/hocr-gtk/callbacks.c:296
msgid "hocr-gtk"
msgstr ""
-#: examples/hocr-gtk/callbacks.c:306
+#: examples/hocr-gtk/callbacks.c:407
msgid "HOCR-GTK"
msgstr ""
-#: examples/hocr-gtk/callbacks.c:312
+#: examples/hocr-gtk/callbacks.c:413
msgid "HOCR-GTK - Hebrew character recognition software"
msgstr "HOCR-GTK - מזהה אותיות אופטי עברי"
-#: examples/hocr-gtk/callbacks.c:314
+#: examples/hocr-gtk/callbacks.c:415
msgid "translator_credits"
msgstr "יעקב זמיר"
#. create font dialog
-#: examples/hocr-gtk/callbacks.c:498
+#: examples/hocr-gtk/callbacks.c:605
msgid "Set text font"
msgstr "קבע את אותיות אזור הטקסט"
Modified: pkg/hocr/trunk/po/hocr-gtk.pot
==============================================================================
--- pkg/hocr/trunk/po/hocr-gtk.pot (original)
+++ pkg/hocr/trunk/po/hocr-gtk.pot Sat Dec 24 22:01:57 2005
@@ -8,7 +8,7 @@
msgstr ""
"Project-Id-Version: PACKAGE VERSION\n"
"Report-Msgid-Bugs-To: \n"
-"POT-Creation-Date: 2005-12-21 11:56+0200\n"
+"POT-Creation-Date: 2005-12-24 23:20+0200\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL at ADDRESS>\n"
"Language-Team: LANGUAGE <LL at li.org>\n"
@@ -16,133 +16,133 @@
"Content-Type: text/plain; charset=CHARSET\n"
"Content-Transfer-Encoding: 8bit\n"
-#: examples/hocr-gtk/interface.c:133
+#: examples/hocr-gtk/interface.c:128
msgid "hocr-gui"
msgstr ""
-#: examples/hocr-gtk/interface.c:145
+#: examples/hocr-gtk/interface.c:140
msgid "_File"
msgstr ""
-#: examples/hocr-gtk/interface.c:173
+#: examples/hocr-gtk/interface.c:168
msgid "_Edit"
msgstr ""
-#: examples/hocr-gtk/interface.c:204
+#: examples/hocr-gtk/interface.c:199
msgid "Color boxes"
msgstr ""
-#: examples/hocr-gtk/interface.c:207
+#: examples/hocr-gtk/interface.c:202
msgid "Color text boxes"
msgstr ""
-#: examples/hocr-gtk/interface.c:213
+#: examples/hocr-gtk/interface.c:208
msgid "Color misread"
msgstr ""
-#: examples/hocr-gtk/interface.c:216
+#: examples/hocr-gtk/interface.c:211
msgid "Color misread fonts"
msgstr ""
-#: examples/hocr-gtk/interface.c:221
+#: examples/hocr-gtk/interface.c:216
msgid "Clear"
msgstr ""
-#: examples/hocr-gtk/interface.c:225
+#: examples/hocr-gtk/interface.c:220
msgid "Clear text each time you ocr new scan"
msgstr ""
-#: examples/hocr-gtk/interface.c:228
+#: examples/hocr-gtk/interface.c:223
msgid "Ocr"
msgstr ""
-#: examples/hocr-gtk/interface.c:232
+#: examples/hocr-gtk/interface.c:227
msgid "Try to recognize fonts in scaned text"
msgstr ""
-#: examples/hocr-gtk/interface.c:235
+#: examples/hocr-gtk/interface.c:230
msgid "Use dictionary"
msgstr ""
-#: examples/hocr-gtk/interface.c:239
+#: examples/hocr-gtk/interface.c:235
msgid ""
"Try to guess unrecognized fonts in scaned text using internal dictionary"
msgstr ""
-#: examples/hocr-gtk/interface.c:243
+#: examples/hocr-gtk/interface.c:239
msgid "Use nikud"
msgstr ""
-#: examples/hocr-gtk/interface.c:247
+#: examples/hocr-gtk/interface.c:243
msgid "Try to guess nikud"
msgstr ""
-#: examples/hocr-gtk/interface.c:251
+#: examples/hocr-gtk/interface.c:246
msgid "Use spaces"
msgstr ""
-#: examples/hocr-gtk/interface.c:255
+#: examples/hocr-gtk/interface.c:250
msgid "Use spaces for tabs"
msgstr ""
-#: examples/hocr-gtk/interface.c:259
+#: examples/hocr-gtk/interface.c:255
msgid "Use indentation"
msgstr ""
-#: examples/hocr-gtk/interface.c:263
+#: examples/hocr-gtk/interface.c:259
msgid "Try to guess line indentation"
msgstr ""
-#: examples/hocr-gtk/interface.c:267
+#: examples/hocr-gtk/interface.c:263
msgid "_View"
msgstr ""
-#: examples/hocr-gtk/interface.c:291
+#: examples/hocr-gtk/interface.c:287
msgid "_Help"
msgstr ""
-#: examples/hocr-gtk/interface.c:298
+#: examples/hocr-gtk/interface.c:294
msgid "_About"
msgstr ""
-#: examples/hocr-gtk/interface.c:315
+#: examples/hocr-gtk/interface.c:311
msgid "Open a new picture for the OCR"
msgstr ""
-#: examples/hocr-gtk/interface.c:322
+#: examples/hocr-gtk/interface.c:318
msgid "Convert picture to text"
msgstr ""
-#: examples/hocr-gtk/interface.c:329
+#: examples/hocr-gtk/interface.c:325
msgid "Save the text created by the OCR"
msgstr ""
-#: examples/hocr-gtk/interface.c:345
+#: examples/hocr-gtk/interface.c:341
msgid "Spell check the text"
msgstr ""
-#: examples/hocr-gtk/interface.c:382
+#: examples/hocr-gtk/interface.c:378
msgid "About this application"
msgstr ""
#. set the window title
-#: examples/hocr-gtk/callbacks.c:200
+#: examples/hocr-gtk/callbacks.c:296
msgid "hocr-gtk"
msgstr ""
-#: examples/hocr-gtk/callbacks.c:306
+#: examples/hocr-gtk/callbacks.c:407
msgid "HOCR-GTK"
msgstr ""
-#: examples/hocr-gtk/callbacks.c:312
+#: examples/hocr-gtk/callbacks.c:413
msgid "HOCR-GTK - Hebrew character recognition software"
msgstr ""
-#: examples/hocr-gtk/callbacks.c:314
+#: examples/hocr-gtk/callbacks.c:415
msgid "translator_credits"
msgstr ""
#. create font dialog
-#: examples/hocr-gtk/callbacks.c:498
+#: examples/hocr-gtk/callbacks.c:605
msgid "Set text font"
msgstr ""
Modified: pkg/hocr/trunk/src/Makefile.am
==============================================================================
--- pkg/hocr/trunk/src/Makefile.am (original)
+++ pkg/hocr/trunk/src/Makefile.am Sat Dec 24 22:01:57 2005
@@ -1,8 +1,8 @@
## Process this file with automake to produce Makefile.in
-libhocr_la_CFLAGS =\
- -Wall\
- -g
+INCLUDES = @PACKAGE_CFLAGS@ -g -Wall
+
+libhocr_la_CFLAGS = @PACKAGE_CFLAGS@ -g -Wall
lib_LTLIBRARIES = libhocr.la
@@ -18,6 +18,6 @@
libhocr_la_LDFLAGS = -version-info $(VERSION_INFO)
-libhocr_la_LIBADD =
+libhocr_la_LIBADD = @PACKAGE_LIBS@
include_HEADERS = hocr.h hocr_pixbuf.h hocr_object.h hocr_textbuffer.h
Modified: pkg/hocr/trunk/src/Makefile.in
==============================================================================
--- pkg/hocr/trunk/src/Makefile.in (original)
+++ pkg/hocr/trunk/src/Makefile.in Sat Dec 24 22:01:57 2005
@@ -115,8 +115,9 @@
gtkspell_CFLAGS = @gtkspell_CFLAGS@
gtkspell_LIBS = @gtkspell_LIBS@
-libhocr_la_CFLAGS = -Wall -g
+INCLUDES = @PACKAGE_CFLAGS@ -g -Wall
+libhocr_la_CFLAGS = @PACKAGE_CFLAGS@ -g -Wall
lib_LTLIBRARIES = libhocr.la
@@ -125,7 +126,7 @@
libhocr_la_LDFLAGS = -version-info $(VERSION_INFO)
-libhocr_la_LIBADD =
+libhocr_la_LIBADD = @PACKAGE_LIBS@
include_HEADERS = hocr.h hocr_pixbuf.h hocr_object.h hocr_textbuffer.h
mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs
Modified: pkg/hocr/trunk/src/font_layout.c
==============================================================================
--- pkg/hocr/trunk/src/font_layout.c (original)
+++ pkg/hocr/trunk/src/font_layout.c Sat Dec 24 22:01:57 2005
@@ -206,7 +206,8 @@
int i;
int start_counter, end_counter;
int x_start, x_end;
- int y_start, y_end;
+ int y_start_down, y_end_down;
+ int y_start_up, y_end_up;
int font_hight;
/* skip first two letters, they may be line numbering */
@@ -224,8 +225,10 @@
}
/* avg over NUM_OF_FONTS_TO_AVG fonts */
- y_start = 0;
- y_end = 0;
+ y_start_up = 0;
+ y_end_up = 0;
+ y_start_down = 0;
+ y_end_down = 0;
x_start = 0;
x_end = 0;
font_hight = 0;
@@ -237,14 +240,15 @@
while (i < num_of_fonts)
{
if (fonts[i].hight <
- ((1000 + FONT_ASSEND) * avg_font_hight / 1000)
+ ((1000 + 1.7 * FONT_ASSEND) * avg_font_hight / 1000)
&& fonts[i].hight >
- ((1000 - FONT_ASSEND) * avg_font_hight / 1000))
+ ((1000 - 1.5 * FONT_ASSEND) * avg_font_hight / 1000))
{
/* take only first NUM_OF_FONTS_TO_AVG to avg */
if (start_counter < NUM_OF_FONTS_TO_AVG)
{
- y_start += fonts[i].y1;
+ y_start_up += fonts[i].y1;
+ y_start_down += fonts[i].y2;
x_start += fonts[i].x2;
font_hight += fonts[i].hight;
start_counter++;
@@ -259,14 +263,15 @@
{
i--;
if (fonts[i].hight <
- ((1000 + FONT_ASSEND) * avg_font_hight / 1000)
+ ((1000 + 1.7 * FONT_ASSEND) * avg_font_hight / 1000)
&& fonts[i].hight >
- ((1000 - FONT_ASSEND) * avg_font_hight / 1000))
+ ((1000 - 1.5 * FONT_ASSEND) * avg_font_hight / 1000))
{
/* take only last NUM_OF_FONTS_TO_AVG to avg */
if (end_counter < NUM_OF_FONTS_TO_AVG)
{
- y_end += fonts[i].y1;
+ y_end_up += fonts[i].y1;
+ y_end_down += fonts[i].y2;
x_end += fonts[i].x1;
font_hight += fonts[i].hight;
end_counter++;
@@ -286,9 +291,11 @@
}
/* if here then counter is not zero, i can avarage */
- y_start /= start_counter;
+ y_start_up /= start_counter;
+ y_start_down /= start_counter;
x_start /= start_counter;
- y_end /= end_counter;
+ y_end_up /= end_counter;
+ y_end_down /= end_counter;
x_end /= end_counter;
font_hight /= (end_counter + start_counter);
@@ -304,13 +311,11 @@
}
/* make line equation (x_end - x_start) is not zero */
- top_line->a = (double) (y_end - y_start) / (double) (x_end - x_start);
-
- /* FIXME: assume line is horizonatal and parallel ? */
- base_line->a = top_line->a;
-
- top_line->b = y_start - top_line->a * x_start;
- base_line->b = top_line->b + font_hight + 1;
+ top_line->a = (double) (y_end_up - y_start_up) / (double) (x_end - x_start);
+ base_line->a = (double) (y_end_down - y_start_down) / (double) (x_end - x_start);
+
+ top_line->b = y_start_up - top_line->a * x_start;
+ base_line->b = y_start_down - base_line->a * x_start + 1;
return 0;
}
Modified: pkg/hocr/trunk/src/font_recognition.c
==============================================================================
--- pkg/hocr/trunk/src/font_recognition.c (original)
+++ pkg/hocr/trunk/src/font_recognition.c Sat Dec 24 22:01:57 2005
@@ -49,8 +49,8 @@
sum = 0;
/* check a 6*6 triangle */
- for (x = font.x2; x > (font.x2 - 2); x--)
- for (y = font.y2; y > (font.y2 - (x - (font.x1 - 2))); y--)
+ for (x = font.x2; x > (font.x2 - 6); x--)
+ for (y = font.y2; y > (font.y2 - (x - (font.x2 - 6))); y--)
{
sum += (hocr_pixbuf_get_object (pix, x, y) ==
obj) ? 1 : 0;
@@ -449,7 +449,8 @@
for (x = x2; x > x1; x--)
{
sum = 0;
- y = y2 - 3;
+ y = y2;
+
while (y > y1 && sum < 2)
{
sum = ((hocr_pixbuf_get_object (pix, x, y) ==
@@ -459,11 +460,11 @@
y--;
}
- if (max > (y2 - y + 1))
+ if (max > (y2 - y))
{
return 1;
-
}
+
if (max < (y2 - y))
max = (y2 - y);
}
@@ -993,7 +994,6 @@
{
int end, start, end2, start2;
int number_of_bars;
- int start_of_right_bar, end_of_right_bar;
/* helps if patach is atached */
font.y2 += 3;
@@ -1214,8 +1214,13 @@
int x_end_of_top_bar;
int x_start_of_top_bar;
+ /* helps if patach is atached */
+ font.y1 = pix->objects[obj].y1;
+ font.y2 = pix->objects[obj].y2;
+ font.hight = font.y2 - font.y1;
+
/* chek if this is realy is a thin font */
- if (font.width && ((double) font.hight / (double) font.width) < 1.9)
+ if (font.width && ((double) font.hight / (double) font.width) < 1.5)
return 0;
number_of_bars =
@@ -1231,7 +1236,16 @@
obj);
if (number_of_bars != 1)
- return 0;
+ {
+ number_of_bars =
+ count_vertical_bars (pix, font,
+ font.y1 + 2 * font.hight / 3,
+ &start_of_right_bar,
+ &end_of_right_bar, obj);
+
+ if (number_of_bars != 1)
+ return 0;
+ }
/* this is not top bar */
if (end_of_top_bar > (font.y1 + font.hight / 2))
@@ -1298,7 +1312,10 @@
int number_of_bars;
int start, end;
- /* start of font */
+ /* helps if patach is atached */
+ font.y1 = pix->objects[obj].y1;
+ font.y2 = pix->objects[obj].y2;
+ font.hight = font.y2 - font.y1;
number_of_bars =
count_vertical_bars (pix, font, font.y1 + 2 * font.hight / 3,
@@ -1448,11 +1465,12 @@
int start, end, start2, end2;
/* helps if patach is atached */
- font.y2 += 6;
- font.hight += 6;
+ font.y1 = pix->objects[obj].y1;
+ font.y2 = pix->objects[obj].y2;
+ font.hight = font.y2 - font.y1;
/* kaf is wide font */
- if (font.width && (font.hight / font.width) > 2)
+ if (font.width && ((double) font.hight / (double) font.width) > 2.0)
return 0;
/* horizontal bars */
@@ -1489,9 +1507,18 @@
obj);
if (number_of_bars != 1)
- return 0;
+ {
+ number_of_bars =
+ count_vertical_bars (pix, font,
+ font.y1 + 2 * font.hight / 5,
+ &start_of_right_bar,
+ &end_of_right_bar, obj);
- if (((double) font.hight / (double) font.width) > 1.5)
+ if (number_of_bars != 1)
+ return 0;
+ }
+
+ if (((double) font.hight / (double) font.width) > 1.9)
{
font.x1 -= 2;
font.x2 += 2;
@@ -1501,7 +1528,7 @@
return 0;
if (!find_horizintal_top_bar (pix, font, obj, &start2, &end2))
return 0;
- if (start2 > (start + 2))
+ if (start2 > (start + 1))
return 0;
}
@@ -1518,17 +1545,31 @@
int end_of_right_bar;
int start_of_right_bar;
+ /* helps if patach is atached */
+ font.y1 = pix->objects[obj].y1;
+ font.y2 = pix->objects[obj].y2;
+ font.hight = font.y2 - font.y1;
+
+ /* if not tall or too tall font not kaf sofit */
+ if (((double) font.hight / (double) font.width) < 1.5 ||
+ ((double) font.hight / (double) font.width) > 3.1)
+ {
+ return 0;
+ }
+
number_of_bars =
- count_horizontal_bars (pix, font, font.x1 + font.width / 2,
+ count_horizontal_bars (pix, font,
+ font.x1 + font.width / 2,
&start_of_top_bar, &end_of_top_bar, obj);
if (number_of_bars != 1)
return 0;
number_of_bars =
- count_vertical_bars (pix, font, font.y1 + font.hight / 2,
- &start_of_right_bar, &end_of_right_bar,
- obj);
+ count_vertical_bars (pix, font,
+ font.y1 + font.hight / 2,
+ &start_of_right_bar,
+ &end_of_right_bar, obj);
if (number_of_bars != 1)
return 0;
@@ -1562,9 +1603,10 @@
int number_of_bars, start_of_right_bar, end_of_right_bar;
number_of_bars =
- count_vertical_bars (pix, font, font.y1 + 2 * font.hight / 3,
- &start_of_right_bar, &end_of_right_bar,
- obj);
+ count_vertical_bars (pix, font,
+ font.y1 + 2 * font.hight / 3,
+ &start_of_right_bar,
+ &end_of_right_bar, obj);
if (number_of_bars != 1)
return 0;
@@ -1582,17 +1624,19 @@
int start_of_right_bar;
/* helps if patach is atached */
- font.y2 += 6;
- font.hight += 6;
+ font.y1 = pix->objects[obj].y1;
+ font.y2 = pix->objects[obj].y2;
+ font.hight = font.y2 - font.y1;
if (font.width < 10 || font.hight < 15)
return 0;
/* vertical bars */
number_of_bars =
- count_vertical_bars (pix, font, font.y1 + 2 * font.hight / 3,
- &start_of_right_bar, &end_of_right_bar,
- obj);
+ count_vertical_bars (pix, font,
+ font.y1 + 2 * font.hight / 3,
+ &start_of_right_bar,
+ &end_of_right_bar, obj);
/* horizontal bars */
number_of_bars =
@@ -1620,6 +1664,11 @@
end_of_right_bar - 3, font.y2, obj) == 0)
return 0;
+ if (!find_vertical_notch_down_to_left
+ (pix, font.x1, font.y1 + font.hight / 2, font.x1 + font.width / 2,
+ font.y2, obj))
+ return 0;
+
return 1;
}
@@ -1633,26 +1682,26 @@
int start_of_right_bar;
/* helps if patach is atached */
- font.y2 += 2;
- font.y1 -= 2;
- font.hight += 4;
- font.x2 += 2;
- font.x1 -= 2;
- font.width += 4;
+ /* helps if patach is atached */
+ font.y1 = pix->objects[obj].y1;
+ font.y2 = pix->objects[obj].y2;
+ font.hight = font.y2 - font.y1;
/* is sqare */
number_of_bars =
- count_horizontal_bars (pix, font, font.x1 + font.width / 2,
- &start_of_bottom_bar, &end_of_bottom_bar,
- obj);
+ count_horizontal_bars (pix, font,
+ font.x1 + font.width / 2,
+ &start_of_bottom_bar,
+ &end_of_bottom_bar, obj);
if (number_of_bars != 2)
return 0;
number_of_bars =
- count_vertical_bars (pix, font, font.y1 + font.hight / 2,
- &start_of_right_bar, &end_of_right_bar,
- obj);
+ count_vertical_bars (pix, font,
+ font.y1 + font.hight / 2,
+ &start_of_right_bar,
+ &end_of_right_bar, obj);
if (number_of_bars != 2)
return 0;
@@ -1671,9 +1720,10 @@
font.x2 = font.x1 + font.width / 2;
font.width = font.width / 2;
number_of_bars =
- count_vertical_bars (pix, font, font.y1 + font.hight / 2,
- &start_of_right_bar, &end_of_right_bar,
- obj);
+ count_vertical_bars (pix, font,
+ font.y1 + font.hight / 2,
+ &start_of_right_bar,
+ &end_of_right_bar, obj);
font.x1 = start_of_right_bar;
font.width = font.x2 - font.x1;
font.y2 = end_of_bottom_bar;
@@ -1694,6 +1744,11 @@
int end_of_top_bar;
int start_of_top_bar;
+ /* helps if patach is atached */
+ font.y1 = pix->objects[obj].y1;
+ font.y2 = pix->objects[obj].y2;
+ font.hight = font.y2 - font.y1;
+
/* not caf */
find_horizintal_top_bar
(pix, font, obj, &start_of_top_bar, &end_of_top_bar);
@@ -1702,14 +1757,17 @@
if (find_vertical_notch_up_to_left (pix,
font.x1, font.y1,
font.x1 + font.width / 2,
- font.y1 + font.hight / 3, obj)
- && find_vertical_notch_up_to_right (pix, font.x1 + font.width / 2,
- font.y1, font.x2,
+ font.y1 + font.hight / 3,
+ obj)
+ && find_vertical_notch_up_to_right (pix,
+ font.x1 +
+ font.width / 2, font.y1,
+ font.x2,
font.y1 + font.hight / 3, obj))
return 0;
/* if wide font check top bar start */
- if (font.width && (font.hight / font.width) < 2)
+ if (font.width && ((double) font.hight / (double) font.width) < 2.1)
{
if (start_of_top_bar < (font.x1 + font.width / 7))
return 0;
@@ -1720,7 +1778,8 @@
(pix, font, obj, &start_of_top_bar, &end_of_top_bar))
{
number_of_bars =
- count_vertical_bars (pix, font, font.y2 - 2,
+ count_vertical_bars (pix, font,
+ font.y2 - 2,
&start_of_right_bar,
&end_of_right_bar, obj);
@@ -1730,9 +1789,10 @@
}
number_of_bars =
- count_vertical_bars (pix, font, font.y1 + font.hight / 2,
- &start_of_right_bar, &end_of_right_bar,
- obj);
+ count_vertical_bars (pix, font,
+ font.y1 + font.hight / 2,
+ &start_of_right_bar,
+ &end_of_right_bar, obj);
if (number_of_bars != 1)
return 0;
@@ -1742,26 +1802,53 @@
/* horizontal bars */
number_of_bars =
- count_horizontal_bars (pix, font, font.x1 + font.width / 2,
+ count_horizontal_bars (pix, font,
+ font.x1 + font.width / 2,
&start_of_top_bar, &end_of_top_bar, obj);
- if (thin_lines (pix, font, obj) == 0 && number_of_bars != 2)
- return 0;
+ if (!thin_lines (pix, font, obj) && number_of_bars != 2)
+ {
+ number_of_bars =
+ count_horizontal_bars (pix, font,
+ font.x1 + font.width / 3,
+ &start_of_top_bar,
+ &end_of_top_bar, obj);
- if (font.width && thin_lines (pix, font, obj) == 1
+ if (!thin_lines (pix, font, obj) && number_of_bars != 2)
+ return 0;
+ }
+
+ if (font.width && thin_lines (pix, font, obj)
&& (font.hight / font.width) < 2)
return 0;
if (end_of_top_bar < (font.y2 - font.hight / 6))
return 0;
+ /* not pe */
+ number_of_bars =
+ count_vertical_bars (pix, font, font.y1 + font.hight / 3,
+ &start_of_right_bar, &end_of_right_bar,
+ obj);
+ if (number_of_bars != 1)
+ return 0;
+
+ number_of_bars =
+ count_vertical_bars (pix, font,
+ font.y1 + 2 * font.hight / 5,
+ &start_of_right_bar,
+ &end_of_right_bar, obj);
+
+ if (number_of_bars != 1)
+ return 0;
+
return 1;
}
int
has_nun_sofit_mark (hocr_pixbuf * pix, hocr_box font, unsigned int obj)
{
- if (font.width && font.hight / font.width < 3)
+ if (font.width && (((double) font.hight / (double) font.width) < 2.5))
return 0;
return 1;
@@ -1776,26 +1863,26 @@
int end_of_right_bar;
int start_of_right_bar;
- font.y2 += 2;
- font.y1 -= 2;
- font.hight += 4;
- font.x2 += 2;
- font.x1 -= 2;
- font.width += 4;
+ /* helps if patach is atached */
+ font.y1 = pix->objects[obj].y1;
+ font.y2 = pix->objects[obj].y2;
+ font.hight = font.y2 - font.y1;
/* is sqare */
number_of_bars =
- count_horizontal_bars (pix, font, font.x1 + font.width / 2,
- &start_of_bottom_bar, &end_of_bottom_bar,
- obj);
+ count_horizontal_bars (pix, font,
+ font.x1 + font.width / 2,
+ &start_of_bottom_bar,
+ &end_of_bottom_bar, obj);
if (number_of_bars != 2)
return 0;
number_of_bars =
- count_vertical_bars (pix, font, font.y1 + font.hight / 2,
- &start_of_right_bar, &end_of_right_bar,
- obj);
+ count_vertical_bars (pix, font,
+ font.y1 + font.hight / 2,
+ &start_of_right_bar,
+ &end_of_right_bar, obj);
if (number_of_bars != 2)
return 0;
@@ -1814,9 +1901,10 @@
font.x2 = font.x1 + font.width / 2;
font.width = font.width / 2;
number_of_bars =
- count_vertical_bars (pix, font, font.y1 + font.hight / 2,
- &start_of_right_bar, &end_of_right_bar,
- obj);
+ count_vertical_bars (pix, font,
+ font.y1 + font.hight / 2,
+ &start_of_right_bar,
+ &end_of_right_bar, obj);
font.x1 = start_of_right_bar;
font.width = font.x2 - font.x1;
font.y2 = end_of_bottom_bar;
@@ -1835,11 +1923,17 @@
int end_of_right_bar;
int start_of_right_bar;
+ /* helps if patach is atached */
+ font.y1 = pix->objects[obj].y1;
+ font.y2 = pix->objects[obj].y2;
+ font.hight = font.y2 - font.y1;
+
/* vertical bars */
number_of_bars =
- count_vertical_bars (pix, font, font.y1 + font.hight / 3,
- &start_of_right_bar, &end_of_right_bar,
- obj);
+ count_vertical_bars (pix, font,
+ font.y1 + font.hight / 3,
+ &start_of_right_bar,
+ &end_of_right_bar, obj);
if (number_of_bars != 2)
{
@@ -1854,19 +1948,22 @@
}
if (find_horizontal_notch_to_left_down
- (pix, font.x1, font.y2 - font.hight / 2, font.x1 + font.width / 3,
- font.y2, obj) != 1)
+ (pix, font.x1, font.y2 - font.hight / 2,
+ font.x1 + font.width / 3, font.y2, obj) != 1)
return 0;
/* not tzadi */
if (find_horizontal_notch_to_right_down
- (pix, font.x2 - font.width / 2, font.y2 - font.hight / 2, font.x2,
- font.y2, obj) == 1)
+ (pix, font.x2 - font.width / 2, font.y2 - font.hight / 2,
+ font.x2, font.y2, obj) == 1)
return 0;
if (has_black_left_bottom_mark (pix, font, obj) == 0)
return 0;
+ if (has_black_right_bottom_mark (pix, font, obj) == 1)
+ return 0;
+
/* not pe */
if (find_vertical_notch_up_to_right
@@ -1892,20 +1989,23 @@
int end_of_right_bar;
int start_of_right_bar;
- /* helps if patach is atached */
- font.y2 += 3;
- font.hight += 3;
+ // * helps if patach is atached */
+ font.y1 = pix->objects[obj].y1;
+ font.y2 = pix->objects[obj].y2;
+ font.hight = font.y2 - font.y1;
/* vertical bars */
number_of_bars =
- count_vertical_bars (pix, font, font.y1 + font.hight / 6,
+ count_vertical_bars (pix, font,
+ font.y1 + font.hight / 6,
&start_of_top_bar, &end_of_top_bar, obj);
/* not ayin */
number_of_bars =
- count_vertical_bars (pix, font, font.y1 + font.hight / 2,
- &start_of_right_bar, &end_of_right_bar,
- obj);
+ count_vertical_bars (pix, font,
+ font.y1 + font.hight / 2,
+ &start_of_right_bar,
+ &end_of_right_bar, obj);
if (end_of_right_bar < (font.x1 + 2 * font.width / 3))
return 0;
if (end_of_right_bar < end_of_top_bar)
@@ -1913,7 +2013,8 @@
/* horizontal bars */
number_of_bars =
- count_horizontal_bars (pix, font, font.x1 + font.width / 2,
+ count_horizontal_bars (pix, font,
+ font.x1 + font.width / 2,
&start_of_top_bar, &end_of_top_bar, obj);
if (end_of_top_bar < (font.y1 + 2 * font.hight / 3))
@@ -1929,8 +2030,8 @@
/* not shin */
if (find_horizontal_notch_to_left_down
- (pix, font.x1, font.y1 + font.hight / 2, font.x1 + font.width / 2,
- font.y2, obj) != 1)
+ (pix, font.x1, font.y1 + font.hight / 2,
+ font.x1 + font.width / 2, font.y2, obj) != 1)
return 0;
/* not tzadi */
@@ -1939,6 +2040,12 @@
font.y1 + font.hight / 3, obj) == 1)
return 0;
+ /* pe can never be too thin */
+ if (((double) font.hight / (double) font.width) > 2.8)
+ {
+ return 0;
+ }
+
return 1;
}
@@ -1949,11 +2056,17 @@
int end_of_right_bar;
int start_of_right_bar;
+ /* helps if patach is atached */
+ font.y1 = pix->objects[obj].y1;
+ font.y2 = pix->objects[obj].y2;
+ font.hight = font.y2 - font.y1;
+
/* vertical bars */
number_of_bars =
- count_vertical_bars (pix, font, font.y1 + font.hight / 3,
- &start_of_right_bar, &end_of_right_bar,
- obj);
+ count_vertical_bars (pix, font,
+ font.y1 + font.hight / 3,
+ &start_of_right_bar,
+ &end_of_right_bar, obj);
if (number_of_bars != 2)
return 0;
if (find_horizontal_path
@@ -1995,12 +2108,13 @@
hocr_box box_down;
/* helps if patach is atached */
- font.y2 += 3;
- font.hight += 3;
+ font.y1 = pix->objects[obj].y1;
+ font.y2 = pix->objects[obj].y2;
+ font.hight = font.y2 - font.y1;
if (find_horizontal_notch_to_left_down
- (pix, font.x1, font.y2 - font.hight / 2, font.x1 + font.width / 3,
- font.y2, obj) != 1)
+ (pix, font.x1, font.y2 - font.hight / 2,
+ font.x1 + font.width / 3, font.y2, obj) != 1)
return 0;
if (find_horizontal_notch_to_right_up
@@ -2018,9 +2132,10 @@
/* horizontal bars */
number_of_bars =
- count_horizontal_bars (pix, box_down, font.x1 + font.width / 3,
- &start_of_bottom_bar, &end_of_bottom_bar,
- obj);
+ count_horizontal_bars (pix, box_down,
+ font.x1 + font.width / 3,
+ &start_of_bottom_bar,
+ &end_of_bottom_bar, obj);
if (number_of_bars != 1)
return 0;
@@ -2029,8 +2144,8 @@
number_of_bars =
count_vertical_bars (pix, font,
start_of_bottom_bar - 3,
- &start_of_right_bar, &end_of_right_bar,
- obj);
+ &start_of_right_bar,
+ &end_of_right_bar, obj);
if (number_of_bars != 1)
return 0;
@@ -2040,7 +2155,8 @@
return 0;
number_of_bars =
- count_horizontal_bars (pix, font, font.x1 + font.width / 4,
+ count_horizontal_bars (pix, font,
+ font.x1 + font.width / 4,
&start_of_top_bar, &end_of_top_bar, obj);
if (number_of_bars == 1)
return 0;
@@ -2052,21 +2168,35 @@
has_tzadi_sofit_mark (hocr_pixbuf * pix, hocr_box font, unsigned int obj)
{
int number_of_bars;
- int end_of_top_bar;
- int start_of_top_bar;
int end_of_right_bar;
int start_of_right_bar;
+ /* helps if patach is atached */
+ font.y1 = pix->objects[obj].y1;
+ font.y2 = pix->objects[obj].y2;
+ font.hight = font.y2 - font.y1;
+
number_of_bars =
- count_vertical_bars (pix, font, font.y1 + font.hight / 6,
- &start_of_right_bar, &end_of_right_bar,
- obj);
+ count_vertical_bars (pix, font,
+ font.y1 + font.hight / 6,
+ &start_of_right_bar,
+ &end_of_right_bar, obj);
if (number_of_bars != 2)
- return 0;
+ {
+ number_of_bars =
+ count_vertical_bars (pix, font,
+ font.y1 + 1,
+ &start_of_right_bar,
+ &end_of_right_bar, obj);
+ if (number_of_bars != 2)
+ return 0;
+ }
+
number_of_bars =
- count_vertical_bars (pix, font, font.y1 + 5 * font.hight / 6,
- &start_of_right_bar, &end_of_right_bar,
- obj);
+ count_vertical_bars (pix, font,
+ font.y1 + 5 * font.hight / 6,
+ &start_of_right_bar,
+ &end_of_right_bar, obj);
if (number_of_bars != 1)
return 0;
@@ -2074,7 +2204,7 @@
{
if (find_horizontal_notch_to_left_down
(pix, font.x1,
- font.y2 - font.hight / 2, font.x1 + font.width / 6,
+ font.y2 - font.hight / 3, font.x1 + font.width / 6,
font.y2, obj) == 1)
return 0;
}
@@ -2091,16 +2221,23 @@
int end_of_right_bar;
int start_of_right_bar;
+ /* helps if patach is atached */
+ font.y1 = pix->objects[obj].y1;
+ font.y2 = pix->objects[obj].y2;
+ font.hight = font.y2 - font.y1;
+
number_of_bars =
- count_horizontal_bars (pix, font, font.x1 + font.width / 2,
+ count_horizontal_bars (pix, font,
+ font.x1 + font.width / 2,
&start_of_top_bar, &end_of_top_bar, obj);
if (number_of_bars != 2)
return 0;
number_of_bars =
- count_vertical_bars (pix, font, font.y1 + 7 * font.hight / 8,
- &start_of_right_bar, &end_of_right_bar,
- obj);
+ count_vertical_bars (pix, font,
+ font.y1 + 7 * font.hight / 8,
+ &start_of_right_bar,
+ &end_of_right_bar, obj);
if (number_of_bars != 1)
return 0;
if (end_of_right_bar > (font.x1 + font.width / 2))
@@ -2110,6 +2247,20 @@
(pix, font.x1 + font.width / 3, font.y1,
font.x1 + 2 * font.width / 3, font.y1 + font.hight / 3, obj) == 1)
return 0;
+
+ /* not tzadi */
+ if (find_vertical_notch_up_to_left (pix,
+ font.x1, font.y1,
+ font.x1 + font.width / 2,
+ font.y1 + font.hight / 3,
+ obj)
+ && find_vertical_notch_up_to_right (pix,
+ font.x1 +
+ font.width / 2, font.y1,
+ font.x2,
+ font.y1 + font.hight / 3, obj))
+ return 0;
+
return 1;
}
@@ -2124,15 +2275,21 @@
int x_end_of_top_bar;
int x_start_of_top_bar;
+ /* resh is wide font */
+ if (font.width && ((double) font.hight / (double) font.width) > 2.5)
+ return 0;
+
number_of_bars =
- count_horizontal_bars (pix, font, font.x1 + font.width / 2,
+ count_horizontal_bars (pix, font,
+ font.x1 + font.width / 2,
&start_of_top_bar, &end_of_top_bar, obj);
if (number_of_bars != 1)
return 0;
number_of_bars =
- count_vertical_bars (pix, font, font.y1 + font.hight / 2,
- &start_of_right_bar, &end_of_right_bar,
- obj);
+ count_vertical_bars (pix, font,
+ font.y1 + font.hight / 2,
+ &start_of_right_bar,
+ &end_of_right_bar, obj);
if (number_of_bars != 1)
return 0;
/* this is not top bar */
@@ -2144,9 +2301,10 @@
/* is zain */
number_of_bars =
count_vertical_bars (pix, font,
- (end_of_top_bar + start_of_top_bar) / 2,
- &x_start_of_top_bar, &x_end_of_top_bar,
- obj);
+ (end_of_top_bar +
+ start_of_top_bar) / 2,
+ &x_start_of_top_bar,
+ &x_end_of_top_bar, obj);
if (number_of_bars != 1)
return 0;
if (end_of_right_bar < x_end_of_top_bar)
@@ -2171,14 +2329,15 @@
/* start of font */
number_of_bars =
- count_vertical_bars (pix, font, font.y1 + font.hight / 2,
- &start, &end, obj);
+ count_vertical_bars (pix, font,
+ font.y1 + font.hight / 2, &start,
+ &end, obj);
if (number_of_bars != 3)
{
number_of_bars =
count_vertical_bars (pix, font,
- font.y1 + font.hight / 3, &start,
- &end, obj);
+ font.y1 + font.hight / 3,
+ &start, &end, obj);
if (number_of_bars != 3)
return 0;
}
@@ -2205,8 +2364,9 @@
/* start of font */
number_of_bars =
- count_vertical_bars (pix, font, font.y1 + font.hight / 2,
- &start, &end, obj);
+ count_vertical_bars (pix, font,
+ font.y1 + font.hight / 2, &start,
+ &end, obj);
if (number_of_bars < 2)
return 0;
@@ -2228,9 +2388,15 @@
int number_of_bars;
int start, end;
+ /* helps if patach is atached */
+ font.y1 = pix->objects[obj].y1;
+ font.y2 = pix->objects[obj].y2;
+ font.hight = font.y2 - font.y1;
+
number_of_bars =
- count_vertical_bars (pix, font, font.y1 + font.hight / 2,
- &start, &end, obj);
+ count_vertical_bars (pix, font,
+ font.y1 + font.hight / 2, &start,
+ &end, obj);
if (number_of_bars != 2)
return 0;
/* if tav */
@@ -2239,13 +2405,15 @@
font.y2, obj) == 1)
return 0;
number_of_bars =
- count_vertical_bars (pix, font, font.y1 + 2 * font.hight / 3,
+ count_vertical_bars (pix, font,
+ font.y1 + 2 * font.hight / 3,
&start, &end, obj);
if (number_of_bars != 2)
return 0;
number_of_bars =
- count_horizontal_bars (pix, font, font.x1 + font.width / 2,
- &start, &end, obj);
+ count_horizontal_bars (pix, font,
+ font.x1 + font.width / 2, &start,
+ &end, obj);
if (start > (font.y1 + font.hight / 6))
return 0;
if (end > (font.y1 + font.hight / 2))
@@ -2255,6 +2423,104 @@
return 1;
}
+int
+has_open_brace_mark (hocr_pixbuf * pix, hocr_box font, unsigned int obj)
+{
+ int number_of_bars;
+ int start, end;
+
+ /* brace is a tall font */
+ if (((double) font.hight / (double) font.width) < 2.5)
+ {
+ return 0;
+ }
+
+ number_of_bars =
+ count_vertical_bars (pix, font,
+ font.y1 + font.hight / 2, &start,
+ &end, obj);
+
+ if (number_of_bars != 1)
+ return 0;
+
+ if (end < (font.x1 + font.width / 3))
+ return 0;
+
+ number_of_bars =
+ count_vertical_bars (pix, font,
+ font.y1 + 5 * font.hight / 6,
+ &start, &end, obj);
+
+ if (number_of_bars != 1)
+ return 0;
+
+ if (start > (font.x1 + font.width / 3))
+ return 0;
+
+ number_of_bars =
+ count_horizontal_bars (pix, font,
+ font.x1 + font.width / 6, &start,
+ &end, obj);
+
+ if (number_of_bars != 2)
+ return 0;
+
+ return 1;
+}
+
+int
+has_close_brace_mark (hocr_pixbuf * pix, hocr_box font, unsigned int obj)
+{
+ int number_of_bars;
+ int start, end;
+
+ /* brace is a tall font */
+ if (((double) font.hight / (double) font.width) < 2.5)
+ {
+ return 0;
+ }
+
+ number_of_bars =
+ count_vertical_bars (pix, font,
+ font.y1 + font.hight / 2, &start,
+ &end, obj);
+
+ if (number_of_bars != 1)
+ return 0;
+
+ if (start > (font.x1 + 2 * font.width / 3))
+ return 0;
+
+ number_of_bars =
+ count_vertical_bars (pix, font,
+ font.y1 + 5 * font.hight / 6,
+ &start, &end, obj);
+
+ if (number_of_bars != 1)
+ return 0;
+
+ if (end < (font.x1 + 2 * font.width / 3))
+ return 0;
+
+ number_of_bars =
+ count_horizontal_bars (pix, font,
+ font.x1 + 5 * font.width / 6,
+ &start, &end, obj);
+
+ if (number_of_bars != 2)
+ return 0;
+
+ if (find_horizontal_notch_to_left_up
+ (pix, font.x1, font.y1, font.x2, font.y1 + font.hight / 3, obj))
+ return 0;
+
+ if (find_horizontal_notch_to_left_down
+ (pix, font.x1, font.y1 + 2 * font.hight / 3, font.x2, font.y2, obj))
+ return 0;
+
+ return 1;
+}
+
/**
* font recognition code
*/
@@ -2277,7 +2543,7 @@
unsigned int box_obj = 0;
unsigned int object_array[MAX_OBJECTS_IN_FONT];
unsigned int box_object_array[MAX_OBJECTS_IN_FONT];
- hocr_object main_object;
+
int low_line_y;
int high_line_y;
int number_of_object_in_font;
@@ -2307,6 +2573,7 @@
/* get line y */
high_line_y = hocr_line_eq_get_y (line_eqs[1], font.x1);
low_line_y = hocr_line_eq_get_y (line_eqs[0], font.x1);
+
/* check if font is in the line ? */
if (font.y1 > low_line_y || font.y2 < high_line_y)
{
@@ -2344,7 +2611,7 @@
/* get font proportions */
short_font = font.hight < (0.8 * (double) avg_font_hight);
tall_font = font.hight > (1.2 * (double) avg_font_hight);
- thin_font = font.width < (0.75 * (double) avg_font_width);
+ thin_font = font.width < (0.85 * (double) avg_font_width);
wide_font = font.width > (1.2 * (double) avg_font_width);
assending_font =
(font.y1 < (high_line_y - (0.1 * (double) avg_font_hight)));
@@ -2358,14 +2625,14 @@
find_horizintal_top_bar (pix, font, obj, &top_bar_start,
&top_bar_end);
has_bottom_bar_font =
- find_horizintal_bottom_bar (pix, font, obj, &bottom_bar_start,
- &bottom_bar_end);
+ find_horizintal_bottom_bar (pix, font, obj,
+ &bottom_bar_start, &bottom_bar_end);
has_left_bar_font =
find_vertical_left_bar (pix, font, obj, &left_bar_start,
&left_bar_end);
has_right_bar_font =
- find_vertical_right_bar (pix, font, obj, &right_bar_start,
- &right_bar_end);
+ find_vertical_right_bar (pix, font, obj,
+ &right_bar_start, &right_bar_end);
/* we asume font has non zero size */
if (font.width && font.hight)
@@ -2395,7 +2662,8 @@
/* check for holam */
if (!chars[0] && (command & HOCR_COMMAND_NIKUD))
{
- if (font.y2 < high_line_y && font.hight < avg_font_hight / 6
+ if (font.y2 < high_line_y
+ && font.hight < avg_font_hight / 6
&& font.width < avg_font_width / 5)
sprintf (chars, "ֹ");
}
@@ -2425,7 +2693,8 @@
{
/* check that this is not yud */
if (find_horizontal_notch_to_left_up
- (pix, font.x1, font.y1, font.x1 + font.width / 2,
+ (pix, font.x1, font.y1,
+ font.x1 + font.width / 2,
font.y1 + 3.0 * (double) font.hight / 4.0,
obj) == 0)
{
@@ -2494,12 +2763,14 @@
lower_object_box.hight =
pix->objects[box_object_array[1]].hight;
upper_object_is_dot =
- ((upper_object_box.hight <= avg_font_hight / 3)
+ ((upper_object_box.hight <=
+ avg_font_hight / 3)
&& (upper_object_box.y1 > high_line_y)
&& (upper_object_box.y2 <=
(high_line_y + 2 * avg_font_hight / 3)));
lower_object_is_dot =
- ((lower_object_box.hight <= avg_font_hight / 3)
+ ((lower_object_box.hight <=
+ avg_font_hight / 3)
&& (lower_object_box.width <=
avg_font_width / 2)
&& (lower_object_box.y1 >
@@ -2538,7 +2809,8 @@
(count_vertical_bars
(pix, upper_object_box,
upper_object_box.y1 +
- upper_object_box.hight / 3, &start, &end,
+ upper_object_box.hight / 3,
+ &start, &end,
box_object_array[0]) == 2)
|| (start >
upper_object_box.x1 +
@@ -2561,13 +2833,29 @@
/* check for hebrew fonts */
+ /* check for assending or dessending thin fonts ( and ) */
+ if (!chars[0])
+ {
+ if ((assending_font || dessending_font) && !high_font)
+ {
+ if (has_open_brace_mark (pix, font, obj))
+ {
+ sprintf (chars, "(");
+ }
+ else if (has_close_brace_mark (pix, font, obj))
+ {
+ sprintf (chars, ")");
+ }
+ }
+ }
+
/* check for assending font ל */
if (!chars[0])
{
if (assending_font)
{
- if (has_lamed_mark (pix, font, obj) && !high_font
- && tall_font)
+ if (has_lamed_mark (pix, font, obj)
+ && !high_font && tall_font)
{
sprintf (chars, "ל");
}
@@ -2583,30 +2871,32 @@
/* theck for dagesh */
if (number_of_object_in_font > 2)
{
- if (pix->objects[object_array[i]].hight <
- (0.2 * (double) avg_font_hight))
+ if (pix->objects[object_array[i]].
+ hight < (0.2 * (double) avg_font_hight))
i = 2;
}
/* check that second part look like a | */
if (pix->objects[object_array[i]].width
- && pix->objects[object_array[i]].y1 > high_line_y
+ && pix->objects[object_array[i]].y1 >
+ high_line_y
&& pix->objects[object_array[i]].width
- && ((double) pix->objects[object_array[i]].hight /
- (double) pix->objects[object_array[i]].width) >
- 1.5
+ && ((double) pix->objects[object_array[i]].
+ hight /
+ (double) pix->objects[object_array[i]].
+ width) > 1.5
&& pix->objects[object_array[i]].x2 <
(font.x1 + font.width / 2))
{
/* if seconde part is dessending it's kof */
if (pix->objects[object_array[i]].y2 >
- (low_line_y + 3))
+ (pix->objects[object_array[0]].y2 + 3))
{
sprintf (chars, "ק");
two_part_font = TRUE;
}
- else if (has_resh_mark (pix, font, obj) ||
- has_dalet_mark (pix, font, obj))
+ else if (has_resh_mark (pix, font, obj)
+ || has_dalet_mark (pix, font, obj))
{
sprintf (chars, "ה");
two_part_font = TRUE;
@@ -2633,15 +2923,23 @@
{
/* check that this is not yud */
if (font.hight && (font.width / font.hight) > 0
- && find_horizontal_notch_to_left_up (pix, font.x1,
- font.y1,
- font.x1 +
- font.width / 2,
- font.y1 +
+ && find_horizontal_notch_to_left_up (pix,
+ font.
+ x1,
+ font.
+ y1,
+ font.
+ x1 +
+ font.
+ width /
+ 2,
+ font.
+ y1 +
3.0 *
- (double) font.
- hight / 4.0,
- obj) == 0)
+ (double)
+ font.
+ hight /
+ 4.0, obj) == 0)
{
sprintf (chars, "'");
}
@@ -2678,6 +2976,10 @@
/* kaf sofit */
sprintf (chars, "ך");
}
+ else if (has_ayin_mark (pix, font, obj))
+ {
+ sprintf (chars, "ע");
+ }
else if (has_tzadi_sofit_mark (pix, font, obj))
{
/* tzadi */
@@ -2703,6 +3005,10 @@
{
sprintf (chars, "ך");
}
+ else if (has_ayin_mark (pix, font, obj))
+ {
+ sprintf (chars, "ע");
+ }
else if (has_pe_sofit_mark (pix, font, obj))
{
sprintf (chars, "ף");
@@ -2711,10 +3017,6 @@
{
sprintf (chars, "ץ");
}
- else if (has_ayin_mark (pix, font, obj))
- {
- sprintf (chars, "ע");
- }
}
else
{
@@ -2740,8 +3042,10 @@
if (!chars[0])
{
/* TODO: check for attached nikud */
- font.y1 = high_line_y;
- font.y2 = low_line_y;
+ if (font.y1 < high_line_y)
+ font.y1 = high_line_y;
+ if (font.y2 > low_line_y)
+ font.y2 = low_line_y;
font.hight = font.y2 - font.y1;
tall_font = FALSE;
}
@@ -2779,8 +3083,8 @@
/* check for font with top bottom right and left bars */
/* mem sofit, samech */
- if (!chars[0] && regular_font && has_top_bar_font && has_bottom_bar_font
- && has_right_bar_font && has_left_bar_font)
+ if (!chars[0] && regular_font && has_top_bar_font
+ && has_bottom_bar_font && has_right_bar_font && has_left_bar_font)
{
if (has_mem_sofit_mark (pix, font, obj))
{
@@ -2794,13 +3098,17 @@
/* check for font with top right and left bars */
/* het and tav */
- if (!chars[0] && regular_font && has_top_bar_font && has_right_bar_font
- && has_left_bar_font)
+ if (!chars[0] && regular_font && has_top_bar_font
+ && has_right_bar_font && has_left_bar_font)
{
if (has_het_mark (pix, font, obj))
{
sprintf (chars, "ח");
}
+ else if (has_mem_mark (pix, font, obj))
+ {
+ sprintf (chars, "מ");
+ }
else if (has_tav_mark (pix, font, obj))
{
sprintf (chars, "ת");
@@ -2809,8 +3117,8 @@
/* check for font with top right and bottom bars */
/* bet caf nun pe */
- if (!chars[0] && regular_font && has_top_bar_font && has_right_bar_font
- && has_bottom_bar_font)
+ if (!chars[0] && regular_font && has_top_bar_font
+ && has_right_bar_font && has_bottom_bar_font)
{
if (has_bet_mark (pix, font, obj))
{
@@ -3001,19 +3309,19 @@
strcat (chars, "ּ");
}
else if (!found_nikud
- && pix->objects[object_array[1]].width >
- (avg_font_width / 3)
- && pix->objects[object_array[1]].hight <
- (avg_font_width / 3))
+ && pix->objects[object_array[1]].
+ width > (avg_font_width / 3)
+ && pix->objects[object_array[1]].
+ hight < (avg_font_width / 3))
{
strcat (chars, "ַ");
found_nikud = TRUE;
}
else if (!found_nikud
- && pix->objects[object_array[1]].width >
- (avg_font_width / 3)
- && pix->objects[object_array[1]].hight >
- (avg_font_width / 3))
+ && pix->objects[object_array[1]].
+ width > (avg_font_width / 3)
+ && pix->objects[object_array[1]].
+ hight > (avg_font_width / 3))
{
strcat (chars, "ָ");
found_nikud = TRUE;
@@ -3030,19 +3338,19 @@
strcat (chars, "ּ");
}
else if (!found_nikud
- && pix->objects[object_array[1]].width >
- (avg_font_width / 3)
- && pix->objects[object_array[2]].hight <
- (avg_font_width / 3))
+ && pix->objects[object_array[1]].
+ width > (avg_font_width / 3)
+ && pix->objects[object_array[2]].
+ hight < (avg_font_width / 3))
{
strcat (chars, "ַ");
found_nikud = TRUE;
}
else if (!found_nikud
- && pix->objects[object_array[1]].width >
- (avg_font_width / 3)
- && pix->objects[object_array[2]].hight >
- (avg_font_width / 3))
+ && pix->objects[object_array[1]].
+ width > (avg_font_width / 3)
+ && pix->objects[object_array[2]].
+ hight > (avg_font_width / 3))
{
strcat (chars, "ָ");
found_nikud = TRUE;
@@ -3060,7 +3368,8 @@
under_font_box.hight = avg_font_hight;
under_font_object =
- hocr_pixbuf_get_objects_inside_box (pix, under_font_box,
+ hocr_pixbuf_get_objects_inside_box (pix,
+ under_font_box,
under_font_object_array);
number_of_object_under_font =
count_object_array (under_font_object_array);
@@ -3101,17 +3410,19 @@
/* shva or tzere */
else if (!found_nikud && number_of_object_under_font == 2)
{
- if (pix->objects[under_font_object_array[0]].hight <
- (avg_font_hight / 3) &&
- pix->objects[under_font_object_array[0]].y2 >
- pix->objects[under_font_object_array[1]].y1)
+ if (pix->objects[under_font_object_array[0]].
+ hight < (avg_font_hight / 3)
+ && pix->objects[under_font_object_array[0]].
+ y2 > pix->objects[under_font_object_array[1]].y1)
{
strcat (chars, "ֵ");
found_nikud = TRUE;
}
- else if (pix->objects[under_font_object_array[0]].
+ else if (pix->
+ objects[under_font_object_array[0]].
hight < (avg_font_hight / 3)
- && pix->objects[under_font_object_array[0]].
+ && pix->
+ objects[under_font_object_array[0]].
y2 <
pix->objects[under_font_object_array[1]].y1)
{
@@ -3128,10 +3439,14 @@
(avg_font_width / 3))
{
/* kubutx */
- if (pix->objects[under_font_object_array[0]].
+ if (pix->
+ objects[under_font_object_array[0]].
y1 <
- pix->objects[under_font_object_array[1]].y1
- && pix->objects[under_font_object_array[1]].
+ pix->
+ objects[under_font_object_array[1]].
+ y1
+ && pix->
+ objects[under_font_object_array[1]].
y1 <
pix->objects[under_font_object_array[2]].y1)
{
@@ -3149,8 +3464,8 @@
else
{
/* pathach */
- if (pix->objects[under_font_object].hight <
- (avg_font_hight / 4))
+ if (pix->objects[under_font_object].
+ hight < (avg_font_hight / 4))
{
strcat (chars, "ֲ");
found_nikud = TRUE;
@@ -3166,8 +3481,8 @@
/* hataf tzere */
else if (!found_nikud && number_of_object_under_font == 5)
{
- if (pix->objects[under_font_object_array[0]].hight <
- (avg_font_hight / 3))
+ if (pix->objects[under_font_object_array[0]].
+ hight < (avg_font_hight / 3))
{
strcat (chars, "ֱ");
}
@@ -3183,7 +3498,8 @@
over_font_box.hight = avg_font_hight / 2;
over_font_object =
- hocr_pixbuf_get_objects_inside_box (pix, over_font_box,
+ hocr_pixbuf_get_objects_inside_box (pix,
+ over_font_box,
over_font_object_array);
number_of_object_over_font =
count_object_array (over_font_object_array);
@@ -3198,8 +3514,10 @@
(avg_font_width / 3) && (thin_font
|| (pix->
objects
- [over_font_object].x2 <
- (over_font_box.x1 +
+ [over_font_object].
+ x2 <
+ (over_font_box.
+ x1 +
avg_font_width / 2))))
{
strcat (chars, "ֹ");
@@ -3223,7 +3541,7 @@
&& (font.x1 < next_font.x2 || font.x2 > prev_font.x1))
{
/* may be an arteffact or part of font */
- sprintf (chars, "");
+ chars[0] = '\0';
}
else
{
Modified: pkg/hocr/trunk/src/hocr.c
==============================================================================
--- pkg/hocr/trunk/src/hocr.c (original)
+++ pkg/hocr/trunk/src/hocr.c Sat Dec 24 22:01:57 2005
@@ -53,7 +53,7 @@
{
for (x = font.x1; x <= font.x2; x++)
{
- if (new_color = hocr_pixbuf_get_object (pix, x, y))
+ if ((new_color = hocr_pixbuf_get_object (pix, x, y)))
printf ("%3d", new_color);
else
printf (" ");
@@ -137,9 +137,9 @@
int
hocr_do_ocr (hocr_pixbuf * pix, hocr_text_buffer * text_buffer)
{
- hocr_box *columns; // [MAX_COLUMNS];
- hocr_box *lines; // [MAX_COLUMNS][MAX_LINES];
- hocr_box *fonts; // [MAX_COLUMNS][MAX_LINES][MAX_FONTS_IN_LINE];
+ hocr_box *columns; /* [MAX_COLUMNS] */
+ hocr_box *lines; /* [MAX_COLUMNS][MAX_LINES] */
+ hocr_box *fonts; /* [MAX_COLUMNS][MAX_LINES][MAX_FONTS_IN_LINE] */
hocr_line_eq line_eqs[MAX_COLUMNS][MAX_LINES][2];
@@ -149,7 +149,6 @@
int num_of_columns_in_page = 0;
int num_of_lines_in_page = 0;
int num_of_fonts_in_page = 0;
- int num_of_regular_fonts_in_page = 0;
int avg_line_hight_in_page = 0;
int avg_line_x_start_in_column[MAX_COLUMNS];
@@ -157,12 +156,9 @@
int avg_diff_between_fonts_in_page = 0;
int avg_font_width_in_page = 0;
int avg_font_hight_in_page = 0;
- int avg_regular_font_width_in_page = 0;
- int avg_regular_font_hight_in_page = 0;
int c;
int i, j, k;
- int y1, y2;
/* need this to put in the text_buffer */
char chars[10];
@@ -171,21 +167,27 @@
int end_of_word = 0;
int tabs = 0;
int indent = 0;
+ int font_number = 0;
+
+ /* init the progress indicators */
+ pix->progress = 0;
+ pix->progress_phase = 0;
/* memory allocation */
- columns = malloc (sizeof (hocr_box) * MAX_COLUMNS);
+ columns = (hocr_box *) malloc (sizeof (hocr_box) * MAX_COLUMNS);
if (!columns)
return -1;
- lines = malloc (sizeof (hocr_box) * MAX_COLUMNS * MAX_LINES);
+ lines = (hocr_box *) malloc (sizeof (hocr_box) * MAX_COLUMNS *
+ MAX_LINES);
if (!lines)
{
free (columns);
return -1;
}
- fonts = malloc (sizeof (hocr_box) * MAX_COLUMNS * MAX_LINES *
- MAX_FONTS_IN_LINE);
+ fonts = (hocr_box *) malloc (sizeof (hocr_box) * MAX_COLUMNS *
+ MAX_LINES * MAX_FONTS_IN_LINE);
if (!fonts)
{
free (lines);
@@ -194,9 +196,11 @@
}
/* create and fill the object map */
+ pix->progress_phase = 1;
hocr_pixbuf_create_object_map (pix);
/* get columns for this page */
+ pix->progress_phase = 2;
fill_columns_array (pix, columns, &num_of_columns_in_page, MAX_COLUMNS);
/* get lines in this column */
@@ -252,15 +256,16 @@
for (i = 0; i < num_of_lines[c]; i++)
{
fill_fonts_array (pix, lines[c * MAX_LINES + i],
- &(fonts[c * MAX_LINES * MAX_FONTS_IN_LINE +
- i * MAX_FONTS_IN_LINE]),
+ &(fonts
+ [c * MAX_LINES * MAX_FONTS_IN_LINE +
+ i * MAX_FONTS_IN_LINE]),
&(num_of_fonts[c][i]),
MAX_FONTS_IN_LINE);
}
}
/* get size statistics for all fonts for all the lines */
- num_of_regular_fonts_in_page = 0;
+ num_of_fonts_in_page = 0;
avg_font_hight_in_page = 0;
avg_font_width_in_page = 0;
avg_diff_between_fonts_in_page = 0;
@@ -276,15 +281,17 @@
for (j = 0; j < num_of_fonts[c][i]; j++)
{
- num_of_regular_fonts_in_page++;
+ num_of_fonts_in_page++;
avg_font_width_in_page +=
fonts[c * MAX_LINES *
MAX_FONTS_IN_LINE +
- i * MAX_FONTS_IN_LINE + j].width;
+ i * MAX_FONTS_IN_LINE +
+ j].width;
avg_font_hight_in_page +=
fonts[c * MAX_LINES *
MAX_FONTS_IN_LINE +
- i * MAX_FONTS_IN_LINE + j].hight;
+ i * MAX_FONTS_IN_LINE +
+ j].hight;
if (j < (num_of_fonts[c][i] - 1))
{
avg_diff_between_fonts_in_page
@@ -292,12 +299,15 @@
(fonts
[c * MAX_LINES *
MAX_FONTS_IN_LINE +
- i * MAX_FONTS_IN_LINE +
+ i *
+ MAX_FONTS_IN_LINE +
j].x1 -
fonts[c * MAX_LINES *
- MAX_FONTS_IN_LINE +
- i * MAX_FONTS_IN_LINE +
- j + 1].x2);
+ MAX_FONTS_IN_LINE
+ +
+ i *
+ MAX_FONTS_IN_LINE
+ + j + 1].x2);
}
}
}
@@ -309,66 +319,14 @@
}
}
- if (num_of_regular_fonts_in_page != 0)
+ if (num_of_fonts_in_page != 0)
{
- avg_font_width_in_page /= num_of_regular_fonts_in_page;
- avg_font_hight_in_page /= num_of_regular_fonts_in_page;
+ avg_font_width_in_page /= num_of_fonts_in_page;
+ avg_font_hight_in_page /= num_of_fonts_in_page;
- if (num_of_regular_fonts_in_page != 1)
+ if (num_of_fonts_in_page != 1)
avg_diff_between_fonts_in_page /=
- (num_of_regular_fonts_in_page - 1);
- }
-
- /* avg over regular fonts only to get better avg_font_hight_in_page */
- num_of_regular_fonts_in_page = 0;
- avg_regular_font_hight_in_page = 0;
- avg_regular_font_hight_in_page = 0;
- for (c = 0; c < num_of_columns_in_page; c++)
- {
- for (i = 0; i < num_of_lines[c]; i++)
- {
- if (lines[c * MAX_LINES + i].hight >
- (avg_line_hight_in_page -
- 1.5 * MIN_DISTANCE_BETWEEN_LINES))
- {
- for (j = 0; j < num_of_fonts[c][i]; j++)
- {
- if (fonts
- [c * MAX_LINES * MAX_FONTS_IN_LINE +
- i * MAX_FONTS_IN_LINE + j].hight <
- ((1000 +
- FONT_ASSEND) *
- avg_font_hight_in_page / 1000)
- && fonts[c * MAX_LINES *
- MAX_FONTS_IN_LINE + i * MAX_FONTS_IN_LINE +
- j].hight >
- ((1000 -
- FONT_ASSEND) *
- avg_font_hight_in_page / 1000))
- {
- num_of_regular_fonts_in_page++;
- avg_regular_font_width_in_page
- +=
- fonts[c * MAX_LINES *
- MAX_FONTS_IN_LINE +
- i * MAX_FONTS_IN_LINE +
- j].width;
- avg_regular_font_hight_in_page
- +=
- fonts[c * MAX_LINES *
- MAX_FONTS_IN_LINE +
- i * MAX_FONTS_IN_LINE +
- j].hight;
- }
- }
- }
- }
- }
-
- if (num_of_regular_fonts_in_page != 0)
- {
- avg_regular_font_width_in_page /= num_of_regular_fonts_in_page;
- avg_regular_font_hight_in_page /= num_of_regular_fonts_in_page;
+ (num_of_fonts_in_page - 1);
}
/* get line equations for non horizontal lines */
@@ -380,11 +338,11 @@
continue;
find_font_baseline_eq (lines[c * MAX_LINES + i],
&(fonts[c * MAX_LINES *
- MAX_FONTS_IN_LINE +
- i * MAX_FONTS_IN_LINE]),
+ MAX_FONTS_IN_LINE +
+ i * MAX_FONTS_IN_LINE]),
&(line_eqs[c][i][0]),
&(line_eqs[c][i][1]),
- avg_regular_font_hight_in_page,
+ avg_font_hight_in_page,
num_of_fonts[c][i]);
/* if line is very not horizontal return error */
if ((line_eqs[c][i][0].a *
@@ -425,9 +383,11 @@
/* if arteffact do not recognize */
if (fonts
[c * MAX_LINES * MAX_FONTS_IN_LINE +
- i * MAX_FONTS_IN_LINE + j].width < 3
+ i * MAX_FONTS_IN_LINE + j].width <
+ 3
|| fonts[c * MAX_LINES *
- MAX_FONTS_IN_LINE + i * MAX_FONTS_IN_LINE +
+ MAX_FONTS_IN_LINE +
+ i * MAX_FONTS_IN_LINE +
j].hight < 3)
continue;
if (fonts
@@ -435,25 +395,30 @@
i * MAX_FONTS_IN_LINE + j].width >
(3.5 *
(double)
- avg_regular_font_width_in_page)
+ avg_font_width_in_page)
|| fonts[c * MAX_LINES *
- MAX_FONTS_IN_LINE + i * MAX_FONTS_IN_LINE +
+ MAX_FONTS_IN_LINE +
+ i * MAX_FONTS_IN_LINE +
j].hight >
(3.5 *
(double)
- avg_regular_font_hight_in_page))
+ avg_font_hight_in_page))
continue;
color_hocr_box (pix,
fonts[c * MAX_LINES *
- MAX_FONTS_IN_LINE +
- i * MAX_FONTS_IN_LINE +
- j], 1, 0);
+ MAX_FONTS_IN_LINE
+ +
+ i *
+ MAX_FONTS_IN_LINE
+ + j], 1, 0);
}
}
}
}
/* do ocr ? */
+ pix->progress_phase = 3;
+ font_number = 0;
if ((pix->command & HOCR_COMMAND_OCR))
{
/* get all fonts for all the lines */
@@ -499,6 +464,15 @@
for (j = 0; j < num_of_fonts[c][i]; j++)
{
+ /* progress the progress indicator */
+ /* start at 128 because 128 is the end of object count */
+ pix->progress =
+ 128 +
+ ((double) font_number /
+ (double) num_of_fonts_in_page)
+ * 127.0;
+ font_number++;
+
if ((j + 1) < num_of_fonts[c][i])
{
/* check for end of word */
@@ -506,23 +480,27 @@
((fonts
[c * MAX_LINES *
MAX_FONTS_IN_LINE +
- i * MAX_FONTS_IN_LINE +
+ i *
+ MAX_FONTS_IN_LINE +
j].x1 -
- fonts[c *
- MAX_LINES *
- MAX_FONTS_IN_LINE +
- i * MAX_FONTS_IN_LINE +
- j + 1].x2) >
+ fonts[c * MAX_LINES *
+ MAX_FONTS_IN_LINE
+ +
+ i *
+ MAX_FONTS_IN_LINE
+ + j + 1].x2) >
MIN_DISTANCE_BETWEEN_WORDS);
/* check for tabs */
- if (avg_regular_font_width_in_page)
+ if (avg_font_width_in_page)
tabs = (fonts
[c *
MAX_LINES *
- MAX_FONTS_IN_LINE +
- i * MAX_FONTS_IN_LINE +
- j].x1 -
+ MAX_FONTS_IN_LINE
+ +
+ i *
+ MAX_FONTS_IN_LINE
+ + j].x1 -
fonts[c *
MAX_LINES
*
@@ -534,7 +512,7 @@
1].x2) /
(NUM_OF_FONTS_IN_TAB
*
- avg_regular_font_width_in_page);
+ avg_font_width_in_page);
else
tabs = 0;
}
@@ -542,9 +520,11 @@
/* if arteffact do not recognize */
if (fonts
[c * MAX_LINES * MAX_FONTS_IN_LINE +
- i * MAX_FONTS_IN_LINE + j].width < 3
+ i * MAX_FONTS_IN_LINE + j].width <
+ 3
|| fonts[c * MAX_LINES *
- MAX_FONTS_IN_LINE + i * MAX_FONTS_IN_LINE +
+ MAX_FONTS_IN_LINE +
+ i * MAX_FONTS_IN_LINE +
j].hight < 3)
{
/* check for end of word */
@@ -560,13 +540,14 @@
i * MAX_FONTS_IN_LINE + j].width >
(3.5 *
(double)
- avg_regular_font_width_in_page)
+ avg_font_width_in_page)
|| fonts[c * MAX_LINES *
- MAX_FONTS_IN_LINE + i * MAX_FONTS_IN_LINE +
+ MAX_FONTS_IN_LINE +
+ i * MAX_FONTS_IN_LINE +
j].hight >
(3.5 *
(double)
- avg_regular_font_hight_in_page))
+ avg_font_hight_in_page))
{
/* check for end of word */
if (end_of_word)
@@ -579,20 +560,21 @@
/* recognize font */
hocr_recognize_font (pix,
&(fonts[c *
- MAX_LINES
- *
- MAX_FONTS_IN_LINE +
- i *
- MAX_FONTS_IN_LINE]),
+ MAX_LINES
+ *
+ MAX_FONTS_IN_LINE
+ +
+ i *
+ MAX_FONTS_IN_LINE]),
num_of_fonts[c][i],
j, line_eqs[c][i],
- avg_regular_font_hight_in_page,
- avg_regular_font_width_in_page,
+ avg_font_hight_in_page,
+ avg_font_width_in_page,
chars,
pix->command);
/* color unknown fonts in the pixbuf */
- if (!chars[0] || chars[0] == '*'
+ if ((!chars[0] || chars[0] == '*')
&& (pix->
command &
HOCR_COMMAND_COLOR_MISREAD))
Modified: pkg/hocr/trunk/src/hocr_object.c
==============================================================================
--- pkg/hocr/trunk/src/hocr_object.c (original)
+++ pkg/hocr/trunk/src/hocr_object.c Sat Dec 24 22:01:57 2005
@@ -59,7 +59,7 @@
{
int i;
- for (i = 0; i < MAX_OBJECTS_IN_FONT, object_array[i]; i++) ;
+ for (i = 0; i < MAX_OBJECTS_IN_FONT && object_array[i]; i++) ;
return i;
}
Modified: pkg/hocr/trunk/src/hocr_pixbuf.c
==============================================================================
--- pkg/hocr/trunk/src/hocr_pixbuf.c (original)
+++ pkg/hocr/trunk/src/hocr_pixbuf.c Sat Dec 24 22:01:57 2005
@@ -392,7 +392,6 @@
hocr_pixbuf_merge_objects (hocr_pixbuf * pix, unsigned int object1,
unsigned int object2)
{
- int x, y;
unsigned int temp_object;
/* object1 is the smaller */
@@ -461,6 +460,11 @@
{
for (x = 1; x < pix->width; x++)
{
+ /* progress the progress indicator */
+ pix->progress =
+ ((double) y * pix->width +
+ (double) x) / ((double) pix->height *
+ (double) pix->width) * 127.0;
/* if this is part of an object */
if (hocr_pixbuf_get_pixel (pix, x, y) == 1)
{
@@ -492,7 +496,6 @@
1) *
rowstride]].
name;
-
/* check that neigbors are from the same */
if (object1 && object2 && object3
&& (object1 != object2))
@@ -597,8 +600,9 @@
pix->objects[i].y2 - pix->objects[i].y1;
pix->objects[i].width =
pix->objects[i].x2 - pix->objects[i].x1;
-
- if (pix->objects[i].weight < 6 || pix->objects[i].hight < 1 || pix->objects[i].width < 1)
+ if (pix->objects[i].weight < 6
+ || pix->objects[i].hight < 1
+ || pix->objects[i].width < 1)
pix->objects[i].name = 0;
}
else
@@ -609,13 +613,12 @@
/* count objects (bigger then MIN_OBJECT_WEIGHT) in pixbuf */
pix->num_of_objects = hocr_pixbuf_count_objects (pix);
-
return 0;
}
unsigned int
-hocr_pixbuf_get_objects_in_box (hocr_pixbuf * pix, hocr_box box,
- unsigned int *object_array)
+hocr_pixbuf_get_objects_in_box (hocr_pixbuf *
+ pix, hocr_box box, unsigned int *object_array)
{
int x, y;
int i = 0;
@@ -624,15 +627,14 @@
/* make sure none object have zero weight */
pix->objects[0].weight = 0;
-
clean_object_array (object_array);
-
for (x = box.x1; x < box.x2; x++)
for (y = box.y1; y < box.y2; y++)
{
if ((object =
hocr_pixbuf_get_object (pix, x, y)) &&
- !(is_in_object_array (object, object_array))
+ !(is_in_object_array
+ (object, object_array))
&& (i < MAX_OBJECTS_IN_FONT))
{
/* add object to object array */
@@ -649,8 +651,9 @@
}
unsigned int
-hocr_pixbuf_get_objects_inside_box (hocr_pixbuf * pix, hocr_box box,
- unsigned int *object_array)
+hocr_pixbuf_get_objects_inside_box (hocr_pixbuf
+ * pix,
+ hocr_box box, unsigned int *object_array)
{
int x, y;
int i = 0;
@@ -659,9 +662,7 @@
/* make sure none object have zero weight */
pix->objects[0].weight = 0;
-
clean_object_array (object_array);
-
for (x = box.x1; x < box.x2; x++)
for (y = box.y1; y < box.y2; y++)
{
@@ -671,7 +672,8 @@
pix->objects[object].x2 <= box.x2 &&
pix->objects[object].y1 >= box.y1 &&
pix->objects[object].y2 <= box.y2 &&
- !(is_in_object_array (object, object_array))
+ !(is_in_object_array
+ (object, object_array))
&& (i < MAX_OBJECTS_IN_FONT))
{
/* add object to object array */
@@ -692,15 +694,11 @@
{
if (pix->pixels)
free (pix->pixels);
-
if (pix->object_map)
free (pix->object_map);
-
if (pix->objects)
free (pix->objects);
-
if (pix)
free (pix);
-
return 1;
}
Modified: pkg/hocr/trunk/src/hocr_pixbuf.h
==============================================================================
--- pkg/hocr/trunk/src/hocr_pixbuf.h (original)
+++ pkg/hocr/trunk/src/hocr_pixbuf.h Sat Dec 24 22:01:57 2005
@@ -77,15 +77,26 @@
/** objects list */
hocr_object *objects;
- /* number of objects found */
+
+ /** number of objects found */
int num_of_objects;
- /* I/O members */
+ /** commands passed to the ocr */
unsigned char command;
+
+ /** error returned from the ocr */
unsigned char error;
-
+
+ /** progress phase of ocr: object identification, page layout anlisis, .. */
+ unsigned char progress_phase;
+
+ /** progress of ocr: 1..100 */
+ unsigned char progress;
} hocr_pixbuf;
+int
+hocr_line_eq_get_y (hocr_line_eq line, int x);
+
/**
@brief get objects in a box
Modified: pkg/hocr/trunk/src/page_layout.c
==============================================================================
--- pkg/hocr/trunk/src/page_layout.c (original)
+++ pkg/hocr/trunk/src/page_layout.c Sat Dec 24 22:01:57 2005
@@ -152,7 +152,6 @@
{
int x, y;
int sum;
- int inside_font = FALSE;
unsigned int object;
for (x = current_pos - MIN_DISTANCE_BETWEEN_FONTS; x > 0; x--)
More information about the Debian-hebrew-package
mailing list