OCR Project
Loading...
Searching...
No Matches
gui_main.c File Reference

SDL2 graphical interface for the OCR crossword solver. More...

#include <SDL2/SDL.h>
#include <SDL2/SDL_ttf.h>
#include "src/cnn/cnn.h"
#include "src/cnn/model.h"
#include "src/preprocess/image.h"
#include "src/segment/segment.h"
#include "src/solver/solver.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
Include dependency graph for gui_main.c:

Classes

struct  GuiState
 Complete application state passed to every GUI function. More...

Macros

#define WIN_W   1280
#define WIN_H   800
#define ROW_H   36
#define ROW_PAD   8
#define LABEL_W   68
#define BTN_W   120
#define FONT_SIZE   15
#define FONT_SIZE_SM   13
#define ROW1_Y   8
#define ROW2_Y   (ROW1_Y + ROW_H + ROW_PAD)
#define ROW3_Y   (ROW2_Y + ROW_H + ROW_PAD)
#define STATUS_Y   (ROW3_Y + ROW_H + 6)
#define PANEL_H   (STATUS_Y + FONT_SIZE_SM + 8)
#define INPUT_X   (LABEL_W + 4)
#define INPUT_W   (WIN_W - INPUT_X - BTN_W - 8 - 8)
#define BTN_X   (INPUT_X + INPUT_W + 8)
#define MAX_RESULTS   64
#define DEFAULT_MODEL_DIR   "models/"
#define FOCUS_NONE   0
#define FOCUS_IMAGE   1
#define FOCUS_MODEL   2
#define FOCUS_WORDS   3
#define TTA_N   5

Functions

static void gui_render (GuiState *g)
 Composite and present one complete frame.
static TTF_Font * find_font (int size)
 Open the first TTF font found in the system font search paths.
static void fill_rect (SDL_Renderer *r, int x, int y, int w, int h, Uint8 cr, Uint8 cg, Uint8 cb, Uint8 ca)
 Draw a solid filled rectangle.
static void outline_rect (SDL_Renderer *r, int x, int y, int w, int h, Uint8 cr, Uint8 cg, Uint8 cb, Uint8 ca)
 Draw a 1-pixel outline rectangle (no fill).
static void draw_text (GuiState *g, TTF_Font *f, const char *txt, int x, int y, Uint8 cr, Uint8 cg, Uint8 cb)
 Render a UTF-8 string at pixel position (x, y).
static void draw_btn (GuiState *g, int btn_id, const char *label, int x, int y, int busy_flag)
 Draw an action button with a centred text label.
static void draw_input (GuiState *g, int field_id, const char *buf, const char *placeholder, int row_y)
 Draw a text-input field with optional placeholder and blinking cursor.
static void update_geometry (GuiState *g)
 Recompute the image display rectangle after a load or window change.
static int to_sx (GuiState *g, int ix)
 Convert an image-space X coordinate to a screen X coordinate.
static int to_sy (GuiState *g, int iy)
 Convert an image-space Y coordinate to a screen Y coordinate.
static void gui_load_image (GuiState *g, const char *path)
 Load a PNG image and create the display texture.
static void gui_load_model (GuiState *g, const char *path)
 Load CNN weights from a binary model file.
static void forward_region (const Image *gray, int x1, int y1, int x2, int y2, CNN *net, float *probs)
 Run one CNN forward pass on a rectangular sub-region of a grayscale image.
static int recognise_cell (const Image *gray, const BoundingBox *box, int cell_size, CNN *net)
 Predict the letter in a grid cell using Test-Time Augmentation (TTA).
static void gui_run_ocr (GuiState *g)
 Run the full OCR pipeline and word search, then store results.
static void draw_word_highlight (GuiState *g, const WordResult *r)
 Overlay a semi-transparent red rectangle on each cell of a found word.
static int btn_hit (int mx, int my, int row_y)
 Test whether a mouse position hits the action button on a given row.
static int input_hit (int mx, int my, int row_y)
 Test whether a mouse position hits the text-input field on a given row.
static void set_focus (GuiState *g, int new_focus)
 Set keyboard focus to a field and start/stop SDL text input.
static void handle_click (GuiState *g, int mx, int my)
 Handle a left mouse-button click.
static void handle_motion (GuiState *g, int mx, int my)
 Update the hovered-button state on mouse motion.
static char * active_buf (GuiState *g, size_t *cap)
 Return a pointer to the text buffer of the currently focused field.
static void handle_keydown (GuiState *g, SDL_Keycode key)
 Handle SDL_KEYDOWN events for the focused text field.
static void handle_text_input (GuiState *g, const char *text)
 Append SDL_TEXTINPUT characters to the focused field's buffer.
int main (int argc, char **argv)
 Entry point for the GUI binary.

Variables

static const char *const FONT_PATHS []

Detailed Description

SDL2 graphical interface for the OCR crossword solver.

Provides a 1280×800 window with three text-input rows:

  • Image — path to the PNG crossword image
  • Modèle — path to the trained CNN model (.bin)
  • Mots — comma-separated list of words to find

Clicking Charger (or pressing Enter) loads the file. Clicking Chercher (or pressing Enter in the words field) runs the full OCR pipeline and overlays red rectangles on each found-word cell. The original image pixels are never modified.

Keyboard shortcuts
  • Tab — cycle focus between fields
  • Ctrl+V — paste from clipboard into the focused field
  • Enter — validate / trigger action for the focused field
  • Escape — clear focus
  • Ctrl+Q — quit
Dependencies
SDL2, SDL2_ttf, libpng — plus the project's own CNN / segment / solver.
Usage
./gui # auto-detects latest model in models/
./gui --model models/foo.bin # explicit model path

Macro Definition Documentation

◆ TTA_N

#define TTA_N   5

Function Documentation

◆ active_buf()

char * active_buf ( GuiState * g,
size_t * cap )
static

Return a pointer to the text buffer of the currently focused field.

Parameters
gApplication state.
capOutput: byte capacity of the returned buffer.
Returns
Pointer to the focused buffer, or NULL if no field is focused.
Here is the caller graph for this function:

◆ btn_hit()

int btn_hit ( int mx,
int my,
int row_y )
static

Test whether a mouse position hits the action button on a given row.

Parameters
mx,myMouse cursor position in window coordinates.
row_yTop-left Y of the row to test.
Returns
Non-zero if the button was hit.
Here is the caller graph for this function:

◆ draw_btn()

void draw_btn ( GuiState * g,
int btn_id,
const char * label,
int x,
int y,
int busy_flag )
static

Draw an action button with a centred text label.

The button colour changes when hovered (btn_id matches g->hovered_btn) and dims when busy_flag is set.

Parameters
gApplication state.
btn_idButton identity (1=load image, 2=load model, 3=search).
labelUTF-8 label string displayed on the button.
x,yTop-left pixel of the button (width is always BTN_W).
busy_flagNon-zero while the OCR pipeline is running (dims button).
Here is the call graph for this function:
Here is the caller graph for this function:

◆ draw_input()

void draw_input ( GuiState * g,
int field_id,
const char * buf,
const char * placeholder,
int row_y )
static

Draw a text-input field with optional placeholder and blinking cursor.

The field is highlighted with a blue border when it has focus (field_id == g->focused). If buf is empty the placeholder text is rendered in a dim colour. A blinking cursor is drawn after the last character when the field is focused.

Parameters
gApplication state.
field_idFOCUS_IMAGE / FOCUS_MODEL / FOCUS_WORDS.
bufCurrent text content of the field.
placeholderHint text displayed when buf is empty.
row_yTop-left Y of the row (field is placed at INPUT_X).
Here is the call graph for this function:
Here is the caller graph for this function:

◆ draw_text()

void draw_text ( GuiState * g,
TTF_Font * f,
const char * txt,
int x,
int y,
Uint8 cr,
Uint8 cg,
Uint8 cb )
static

Render a UTF-8 string at pixel position (x, y).

Creates a temporary texture from the rendered glyph surface, copies it to the renderer, then destroys it. No-op if f or txt is NULL/empty.

Parameters
gApplication state (provides renderer).
fFont to use.
txtUTF-8 string to render.
x,yTop-left pixel of the text.
cr,cg,cbRGB colour.
Here is the caller graph for this function:

◆ draw_word_highlight()

void draw_word_highlight ( GuiState * g,
const WordResult * r )
static

Overlay a semi-transparent red rectangle on each cell of a found word.

Iterates from (start_r, start_c) to (end_r, end_c) using the direction deltas derived from the WordResult, maps each cell index to a BoundingBox in g->cells, converts the bounding-box centre to screen coordinates via to_sx() / to_sy(), and draws a filled + outlined rectangle scaled by g->disp_scale.

Parameters
gApplication state (provides cells, grid_cols, disp_scale, renderer).
rSolver result for one word; no-op if r->found is 0.
Here is the call graph for this function:
Here is the caller graph for this function:

◆ fill_rect()

void fill_rect ( SDL_Renderer * r,
int x,
int y,
int w,
int h,
Uint8 cr,
Uint8 cg,
Uint8 cb,
Uint8 ca )
static

Draw a solid filled rectangle.

Parameters
rSDL renderer.
x,yTop-left corner.
w,hDimensions in pixels.
cr,cg,cb,caRGBA colour components.
Here is the caller graph for this function:

◆ find_font()

TTF_Font * find_font ( int size)
static

Open the first TTF font found in the system font search paths.

Iterates over FONT_PATHS and returns the first font that can be opened at the requested point size. Intended to avoid a hard dependency on a specific font package.

Parameters
sizeDesired point size.
Returns
Opened TTF_Font, or NULL if no font was found. Caller must close with TTF_CloseFont().
Here is the caller graph for this function:

◆ forward_region()

void forward_region ( const Image * gray,
int x1,
int y1,
int x2,
int y2,
CNN * net,
float * probs )
static

Run one CNN forward pass on a rectangular sub-region of a grayscale image.

Copies the region [x1, x2) × [y1, y2) from gray into a temporary Image, binarizes it locally, resizes to CNN_IMG_W × CNN_IMG_H, then calls cnn_forward(). The resulting softmax probabilities are added to probs (not overwritten), allowing TTA accumulation.

Parameters
grayFull grayscale RGBA image (R=G=B=luminance).
x1,y1Top-left of the region (clamped to image bounds).
x2,y2Bottom-right exclusive (clamped to image bounds).
netTrained CNN.
probsArray of CNN_N_CLASSES floats; results are added here.
Here is the call graph for this function:
Here is the caller graph for this function:

◆ gui_load_image()

void gui_load_image ( GuiState * g,
const char * path )
static

Load a PNG image and create the display texture.

Frees any previously loaded image, texture, and OCR results, then loads the PNG at path via image_load_png(). An SDL texture is created from the raw RGBA pixels (SDL_PIXELFORMAT_RGBA32) without copying them. update_geometry() is called to recompute the display rectangle.

Parameters
gApplication state.
pathPath to the PNG file.
Here is the call graph for this function:
Here is the caller graph for this function:

◆ gui_load_model()

void gui_load_model ( GuiState * g,
const char * path )
static

Load CNN weights from a binary model file.

Allocates g->net if necessary, then calls model_load(). On success g->model_buf already holds the path (set by the caller); the status bar is updated to reflect the loaded filename.

Parameters
gApplication state.
pathPath to the .bin model file.
Here is the call graph for this function:
Here is the caller graph for this function:

◆ gui_render()

void gui_render ( GuiState * g)
static

Composite and present one complete frame.

Drawing order:

  1. Dark background.
  2. Control panel (rows 1–3: labels, input fields, buttons; status bar).
  3. Scaled image (if loaded), or a placeholder message.
  4. Word-highlight rectangles for every found WordResult.

Called every ~16 ms from the event loop and also mid-OCR to show progress.

Parameters
gApplication state.
Here is the call graph for this function:
Here is the caller graph for this function:

◆ gui_run_ocr()

void gui_run_ocr ( GuiState * g)
static

Run the full OCR pipeline and word search, then store results.

Steps performed:

  1. Reload the image from g->image_buf and convert to grayscale.
  2. Build a binarized buffer for the segmenter (mean-threshold global binarization).
  3. Call segment_image() to detect letter bounding boxes.
  4. Estimate the grid pitch from the first/last cell centres.
  5. Call recognise_cell() for every cell (with TTA).
  6. Build a CharGrid and run solver_find() for each word in g->words_buf.
  7. Store the BoundingBox array in g->cells for highlight rendering.

The status bar is updated at each major step so the render loop can show progress messages. The original image (g->orig_img) is never touched.

Parameters
gApplication state — must have orig_img and net set.
Here is the call graph for this function:
Here is the caller graph for this function:

◆ handle_click()

void handle_click ( GuiState * g,
int mx,
int my )
static

Handle a left mouse-button click.

Updates focus based on which field was clicked, then triggers the appropriate action if an action button was hit:

Parameters
gApplication state.
mx,myMouse cursor position.
Here is the call graph for this function:
Here is the caller graph for this function:

◆ handle_keydown()

void handle_keydown ( GuiState * g,
SDL_Keycode key )
static

Handle SDL_KEYDOWN events for the focused text field.

Supported keys:

  • Backspace — delete the last character.
  • Enter — validate the field (load file or run OCR).
  • Ctrl+V — paste clipboard text (newlines stripped).
  • Escape — clear focus.
  • Tab — cycle focus to the next field.
Parameters
gApplication state.
keySDL key symbol.
Here is the call graph for this function:
Here is the caller graph for this function:

◆ handle_motion()

void handle_motion ( GuiState * g,
int mx,
int my )
static

Update the hovered-button state on mouse motion.

Parameters
gApplication state.
mx,myCurrent mouse cursor position.
Here is the call graph for this function:
Here is the caller graph for this function:

◆ handle_text_input()

void handle_text_input ( GuiState * g,
const char * text )
static

Append SDL_TEXTINPUT characters to the focused field's buffer.

SDL delivers printable characters via SDL_TEXTINPUT events (already converted from key codes with correct locale/IME handling). The text is appended only if the buffer has room.

Parameters
gApplication state.
textUTF-8 string from the SDL_TEXTINPUT event.
Here is the call graph for this function:
Here is the caller graph for this function:

◆ input_hit()

int input_hit ( int mx,
int my,
int row_y )
static

Test whether a mouse position hits the text-input field on a given row.

Parameters
mx,myMouse cursor position in window coordinates.
row_yTop-left Y of the row to test.
Returns
Non-zero if the field was hit.
Here is the caller graph for this function:

◆ main()

int main ( int argc,
char ** argv )

Entry point for the GUI binary.

Initialises SDL2 and SDL2_ttf, creates the window and renderer, loads the most recently modified model from models/ (or the path given via --model), then enters the event loop. The loop runs at ~60 fps and dispatches events to the appropriate handler before calling gui_render().

Parameters
argcArgument count.
argvArgument vector. Accepted options: --model <path> — explicit model file.
Returns
0 on clean exit, 1 on SDL/TTF initialisation failure.
Here is the call graph for this function:

◆ outline_rect()

void outline_rect ( SDL_Renderer * r,
int x,
int y,
int w,
int h,
Uint8 cr,
Uint8 cg,
Uint8 cb,
Uint8 ca )
static

Draw a 1-pixel outline rectangle (no fill).

Parameters
rSDL renderer.
x,yTop-left corner.
w,hDimensions in pixels.
cr,cg,cb,caRGBA colour components.
Here is the caller graph for this function:

◆ recognise_cell()

int recognise_cell ( const Image * gray,
const BoundingBox * box,
int cell_size,
CNN * net )
static

Predict the letter in a grid cell using Test-Time Augmentation (TTA).

Runs TTA_N forward passes centred on the bounding-box centre, each with a small ±2 px spatial shift, averages the softmax outputs, and returns the argmax class index (0='A' … 25='Z').

The crop window is cell_size × cell_size (grid pitch) so that every letter sees a consistent white border regardless of how tight the connected-component bounding box is. If cell_size is 0, a 35%-padding heuristic is used instead.

Parameters
grayFull grayscale image.
boxTight bounding box returned by the segmenter.
cell_sizeGrid pitch in pixels (pass 0 to use the padding fallback).
netTrained CNN.
Returns
Class index in [0, 25], or 0 on degenerate input.
Here is the call graph for this function:
Here is the caller graph for this function:

◆ set_focus()

void set_focus ( GuiState * g,
int new_focus )
static

Set keyboard focus to a field and start/stop SDL text input.

Parameters
gApplication state.
new_focusFOCUS_IMAGE, FOCUS_MODEL, FOCUS_WORDS, or FOCUS_NONE.
Here is the caller graph for this function:

◆ to_sx()

int to_sx ( GuiState * g,
int ix )
static

Convert an image-space X coordinate to a screen X coordinate.

Here is the caller graph for this function:

◆ to_sy()

int to_sy ( GuiState * g,
int iy )
static

Convert an image-space Y coordinate to a screen Y coordinate.

Here is the caller graph for this function:

◆ update_geometry()

void update_geometry ( GuiState * g)
static

Recompute the image display rectangle after a load or window change.

Calculates the uniform scale factor that fits g->orig_img inside the image area (below PANEL_H) while preserving the aspect ratio, then stores the top-left offset (g->disp_x, g->disp_y) and g->disp_scale.

Parameters
gApplication state; g->orig_img must be non-NULL.
Here is the caller graph for this function:

Variable Documentation

◆ FONT_PATHS

const char* const FONT_PATHS[]
static
Initial value:
= {
"/usr/share/fonts/TTF/DejaVuSans.ttf",
"/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
"/usr/share/fonts/dejavu/DejaVuSans.ttf",
"/usr/share/fonts/TTF/LiberationSans-Regular.ttf",
"/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf",
"/usr/share/fonts/liberation-sans/LiberationSans-Regular.ttf",
"/usr/share/fonts/TTF/FreeSans.ttf",
"/usr/share/fonts/gnu-free/FreeSans.ttf",
"/usr/share/fonts/noto/NotoSans-Regular.ttf",
"/usr/share/fonts/truetype/noto/NotoSans-Regular.ttf",
NULL
}