SDL2 graphical interface for the OCR crossword solver. More...

#include <SDL2/SDL.h>
#include <SDL2/SDL_ttf.h>
#include "src/cnn/cnn.h"
#include "src/cnn/model.h"
#include "src/preprocess/image.h"
#include "src/segment/segment.h"
#include "src/solver/solver.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>

Include dependency graph for gui_main.c:

Classes
struct	GuiState
	Complete application state passed to every GUI function. More...

Macros
#define	WIN_W 1280
#define	WIN_H 800
#define	ROW_H 36
#define	ROW_PAD 8
#define	LABEL_W 68
#define	BTN_W 120
#define	FONT_SIZE 15
#define	FONT_SIZE_SM 13
#define	ROW1_Y 8
#define	ROW2_Y (ROW1_Y + ROW_H + ROW_PAD)
#define	ROW3_Y (ROW2_Y + ROW_H + ROW_PAD)
#define	STATUS_Y (ROW3_Y + ROW_H + 6)
#define	PANEL_H (STATUS_Y + FONT_SIZE_SM + 8)
#define	INPUT_X (LABEL_W + 4)
#define	INPUT_W (WIN_W - INPUT_X - BTN_W - 8 - 8)
#define	BTN_X (INPUT_X + INPUT_W + 8)
#define	MAX_RESULTS 64
#define	DEFAULT_MODEL_DIR "models/"
#define	FOCUS_NONE 0
#define	FOCUS_IMAGE 1
#define	FOCUS_MODEL 2
#define	FOCUS_WORDS 3
#define	TTA_N 5

Functions
static void	gui_render (GuiState *g)
	Composite and present one complete frame.
static TTF_Font *	find_font (int size)
	Open the first TTF font found in the system font search paths.
static void	fill_rect (SDL_Renderer *r, int x, int y, int w, int h, Uint8 cr, Uint8 cg, Uint8 cb, Uint8 ca)
	Draw a solid filled rectangle.
static void	outline_rect (SDL_Renderer *r, int x, int y, int w, int h, Uint8 cr, Uint8 cg, Uint8 cb, Uint8 ca)
	Draw a 1-pixel outline rectangle (no fill).
static void	draw_text (GuiState g, TTF_Font f, const char *txt, int x, int y, Uint8 cr, Uint8 cg, Uint8 cb)
	Render a UTF-8 string at pixel position (x, y).
static void	draw_btn (GuiState g, int btn_id, const char label, int x, int y, int busy_flag)
	Draw an action button with a centred text label.
static void	draw_input (GuiState g, int field_id, const char buf, const char *placeholder, int row_y)
	Draw a text-input field with optional placeholder and blinking cursor.
static void	update_geometry (GuiState *g)
	Recompute the image display rectangle after a load or window change.
static int	to_sx (GuiState *g, int ix)
	Convert an image-space X coordinate to a screen X coordinate.
static int	to_sy (GuiState *g, int iy)
	Convert an image-space Y coordinate to a screen Y coordinate.
static void	gui_load_image (GuiState g, const char path)
	Load a PNG image and create the display texture.
static void	gui_load_model (GuiState g, const char path)
	Load CNN weights from a binary model file.
static void	forward_region (const Image gray, int x1, int y1, int x2, int y2, CNN net, float *probs)
	Run one CNN forward pass on a rectangular sub-region of a grayscale image.
static int	recognise_cell (const Image gray, const BoundingBox box, int cell_size, CNN *net)
	Predict the letter in a grid cell using Test-Time Augmentation (TTA).
static void	gui_run_ocr (GuiState *g)
	Run the full OCR pipeline and word search, then store results.
static void	draw_word_highlight (GuiState g, const WordResult r)
	Overlay a semi-transparent red rectangle on each cell of a found word.
static int	btn_hit (int mx, int my, int row_y)
	Test whether a mouse position hits the action button on a given row.
static int	input_hit (int mx, int my, int row_y)
	Test whether a mouse position hits the text-input field on a given row.
static void	set_focus (GuiState *g, int new_focus)
	Set keyboard focus to a field and start/stop SDL text input.
static void	handle_click (GuiState *g, int mx, int my)
	Handle a left mouse-button click.
static void	handle_motion (GuiState *g, int mx, int my)
	Update the hovered-button state on mouse motion.
static char *	active_buf (GuiState g, size_t cap)
	Return a pointer to the text buffer of the currently focused field.
static void	handle_keydown (GuiState *g, SDL_Keycode key)
	Handle SDL_KEYDOWN events for the focused text field.
static void	handle_text_input (GuiState g, const char text)
	Append SDL_TEXTINPUT characters to the focused field's buffer.
int	main (int argc, char **argv)
	Entry point for the GUI binary.

Variables
static const char *const	FONT_PATHS []

Detailed Description

SDL2 graphical interface for the OCR crossword solver.

Provides a 1280×800 window with three text-input rows:

Image — path to the PNG crossword image
Modèle — path to the trained CNN model (.bin)
Mots — comma-separated list of words to find

Clicking Charger (or pressing Enter) loads the file. Clicking Chercher (or pressing Enter in the words field) runs the full OCR pipeline and overlays red rectangles on each found-word cell. The original image pixels are never modified.

Keyboard shortcuts

Tab — cycle focus between fields
Ctrl+V — paste from clipboard into the focused field
Enter — validate / trigger action for the focused field
Escape — clear focus
Ctrl+Q — quit

Dependencies: SDL2, SDL2_ttf, libpng — plus the project's own CNN / segment / solver.

Usage: ./gui # auto-detects latest model in models/

./gui --model models/foo.bin # explicit model path

Macro Definition Documentation

◆ TTA_N

#define TTA_N 5

Function Documentation

◆ active_buf()

char * active_buf	(	GuiState *	g,
		size_t *	cap )

static

Return a pointer to the text buffer of the currently focused field.

Parameters

g	Application state.
cap	Output: byte capacity of the returned buffer.

Returns: Pointer to the focused buffer, or NULL if no field is focused.

Here is the caller graph for this function:

◆ btn_hit()

int btn_hit	(	int	mx,
		int	my,
		int	row_y )

static

Test whether a mouse position hits the action button on a given row.

Parameters

mx,my	Mouse cursor position in window coordinates.
row_y	Top-left Y of the row to test.

Returns: Non-zero if the button was hit.

Here is the caller graph for this function:

◆ draw_btn()

void draw_btn	(	GuiState *	g,
		int	btn_id,
		const char *	label,
		int	x,
		int	y,
		int	busy_flag )

static

Draw an action button with a centred text label.

The button colour changes when hovered (btn_id matches g->hovered_btn) and dims when busy_flag is set.

Parameters

g	Application state.
btn_id	Button identity (1=load image, 2=load model, 3=search).
label	UTF-8 label string displayed on the button.
x,y	Top-left pixel of the button (width is always BTN_W).
busy_flag	Non-zero while the OCR pipeline is running (dims button).

Here is the call graph for this function:

Here is the caller graph for this function:

◆ draw_input()

void draw_input	(	GuiState *	g,
		int	field_id,
		const char *	buf,
		const char *	placeholder,
		int	row_y )

static

Draw a text-input field with optional placeholder and blinking cursor.

The field is highlighted with a blue border when it has focus (field_id == g->focused). If buf is empty the placeholder text is rendered in a dim colour. A blinking cursor is drawn after the last character when the field is focused.

Parameters

g	Application state.
field_id	FOCUS_IMAGE / FOCUS_MODEL / FOCUS_WORDS.
buf	Current text content of the field.
placeholder	Hint text displayed when `buf` is empty.
row_y	Top-left Y of the row (field is placed at INPUT_X).

Here is the call graph for this function:

Here is the caller graph for this function:

◆ draw_text()

void draw_text	(	GuiState *	g,
		TTF_Font *	f,
		const char *	txt,
		int	x,
		int	y,
		Uint8	cr,
		Uint8	cg,
		Uint8	cb )

static

Render a UTF-8 string at pixel position (x, y).

Creates a temporary texture from the rendered glyph surface, copies it to the renderer, then destroys it. No-op if f or txt is NULL/empty.

Parameters

g	Application state (provides renderer).
f	Font to use.
txt	UTF-8 string to render.
x,y	Top-left pixel of the text.
cr,cg,cb	RGB colour.

Here is the caller graph for this function:

◆ draw_word_highlight()

void draw_word_highlight	(	GuiState *	g,
		const WordResult *	r )

static

Overlay a semi-transparent red rectangle on each cell of a found word.

Iterates from (start_r, start_c) to (end_r, end_c) using the direction deltas derived from the WordResult, maps each cell index to a BoundingBox in g->cells, converts the bounding-box centre to screen coordinates via to_sx() / to_sy(), and draws a filled + outlined rectangle scaled by g->disp_scale.

Parameters

g	Application state (provides cells, grid_cols, disp_scale, renderer).
r	Solver result for one word; no-op if `r->found` is 0.

Here is the call graph for this function:

Here is the caller graph for this function:

◆ fill_rect()

void fill_rect	(	SDL_Renderer *	r,
		int	x,
		int	y,
		int	w,
		int	h,
		Uint8	cr,
		Uint8	cg,
		Uint8	cb,
		Uint8	ca )

static

Draw a solid filled rectangle.

Parameters

r	SDL renderer.
x,y	Top-left corner.
w,h	Dimensions in pixels.
cr,cg,cb,ca	RGBA colour components.

Here is the caller graph for this function:

◆ find_font()

TTF_Font * find_font ( int size )

static

Open the first TTF font found in the system font search paths.

Iterates over FONT_PATHS and returns the first font that can be opened at the requested point size. Intended to avoid a hard dependency on a specific font package.

Parameters

size	Desired point size.

Returns: Opened TTF_Font, or NULL if no font was found. Caller must close with TTF_CloseFont().

Here is the caller graph for this function:

◆ forward_region()

void forward_region	(	const Image *	gray,
		int	x1,
		int	y1,
		int	x2,
		int	y2,
		CNN *	net,
		float *	probs )

static

Run one CNN forward pass on a rectangular sub-region of a grayscale image.

Copies the region [x1, x2) × [y1, y2) from gray into a temporary Image, binarizes it locally, resizes to CNN_IMG_W × CNN_IMG_H, then calls cnn_forward(). The resulting softmax probabilities are added to probs (not overwritten), allowing TTA accumulation.

Parameters

gray	Full grayscale RGBA image (R=G=B=luminance).
x1,y1	Top-left of the region (clamped to image bounds).
x2,y2	Bottom-right exclusive (clamped to image bounds).
net	Trained CNN.
probs	Array of CNN_N_CLASSES floats; results are added here.

Here is the call graph for this function:

Here is the caller graph for this function:

◆ gui_load_image()

void gui_load_image	(	GuiState *	g,
		const char *	path )

static

Load a PNG image and create the display texture.

Frees any previously loaded image, texture, and OCR results, then loads the PNG at path via image_load_png(). An SDL texture is created from the raw RGBA pixels (SDL_PIXELFORMAT_RGBA32) without copying them. update_geometry() is called to recompute the display rectangle.

Parameters

g	Application state.
path	Path to the PNG file.

Here is the call graph for this function:

Here is the caller graph for this function:

◆ gui_load_model()

void gui_load_model	(	GuiState *	g,
		const char *	path )

static

Load CNN weights from a binary model file.

Allocates g->net if necessary, then calls model_load(). On success g->model_buf already holds the path (set by the caller); the status bar is updated to reflect the loaded filename.

Parameters

g	Application state.
path	Path to the .bin model file.

Here is the call graph for this function:

Here is the caller graph for this function:

◆ gui_render()

void gui_render ( GuiState * g )

static

Composite and present one complete frame.

Drawing order:

Dark background.
Control panel (rows 1–3: labels, input fields, buttons; status bar).
Scaled image (if loaded), or a placeholder message.
Word-highlight rectangles for every found WordResult.

Called every ~16 ms from the event loop and also mid-OCR to show progress.

Parameters

g	Application state.

Here is the call graph for this function:

Here is the caller graph for this function:

◆ gui_run_ocr()

void gui_run_ocr ( GuiState * g )

static

Run the full OCR pipeline and word search, then store results.

Steps performed:

Reload the image from g->image_buf and convert to grayscale.
Build a binarized buffer for the segmenter (mean-threshold global binarization).
Call segment_image() to detect letter bounding boxes.
Estimate the grid pitch from the first/last cell centres.
Call recognise_cell() for every cell (with TTA).
Build a CharGrid and run solver_find() for each word in g->words_buf.
Store the BoundingBox array in g->cells for highlight rendering.

The status bar is updated at each major step so the render loop can show progress messages. The original image (g->orig_img) is never touched.

Parameters

g	Application state — must have orig_img and net set.

Here is the call graph for this function:

Here is the caller graph for this function:

◆ handle_click()

void handle_click	(	GuiState *	g,
		int	mx,
		int	my )

static

Handle a left mouse-button click.

Updates focus based on which field was clicked, then triggers the appropriate action if an action button was hit:

Row 1 button → gui_load_image()
Row 2 button → gui_load_model()
Row 3 button → gui_run_ocr() (ignored while busy)

Parameters

g	Application state.
mx,my	Mouse cursor position.

Here is the call graph for this function:

Here is the caller graph for this function:

◆ handle_keydown()

void handle_keydown	(	GuiState *	g,
		SDL_Keycode	key )

static

Handle SDL_KEYDOWN events for the focused text field.

Supported keys:

Backspace — delete the last character.
Enter — validate the field (load file or run OCR).
Ctrl+V — paste clipboard text (newlines stripped).
Escape — clear focus.
Tab — cycle focus to the next field.

Parameters

g	Application state.
key	SDL key symbol.

Here is the call graph for this function:

Here is the caller graph for this function:

◆ handle_motion()

void handle_motion	(	GuiState *	g,
		int	mx,
		int	my )

static

Update the hovered-button state on mouse motion.

Parameters

g	Application state.
mx,my	Current mouse cursor position.

Here is the call graph for this function:

Here is the caller graph for this function:

◆ handle_text_input()

void handle_text_input	(	GuiState *	g,
		const char *	text )

static

Append SDL_TEXTINPUT characters to the focused field's buffer.

SDL delivers printable characters via SDL_TEXTINPUT events (already converted from key codes with correct locale/IME handling). The text is appended only if the buffer has room.

Parameters

g	Application state.
text	UTF-8 string from the SDL_TEXTINPUT event.

Here is the call graph for this function:

Here is the caller graph for this function:

◆ input_hit()

int input_hit	(	int	mx,
		int	my,
		int	row_y )

static

Test whether a mouse position hits the text-input field on a given row.

Parameters

mx,my	Mouse cursor position in window coordinates.
row_y	Top-left Y of the row to test.

Returns: Non-zero if the field was hit.

Here is the caller graph for this function:

◆ main()

int main	(	int	argc,
		char **	argv )

Entry point for the GUI binary.

Initialises SDL2 and SDL2_ttf, creates the window and renderer, loads the most recently modified model from models/ (or the path given via --model), then enters the event loop. The loop runs at ~60 fps and dispatches events to the appropriate handler before calling gui_render().

Parameters

argc	Argument count.
argv	Argument vector. Accepted options: `--model` `<path>` — explicit model file.

Returns: 0 on clean exit, 1 on SDL/TTF initialisation failure.

Here is the call graph for this function:

◆ outline_rect()

void outline_rect	(	SDL_Renderer *	r,
		int	x,
		int	y,
		int	w,
		int	h,
		Uint8	cr,
		Uint8	cg,
		Uint8	cb,
		Uint8	ca )

static

Draw a 1-pixel outline rectangle (no fill).

Parameters

r	SDL renderer.
x,y	Top-left corner.
w,h	Dimensions in pixels.
cr,cg,cb,ca	RGBA colour components.

Here is the caller graph for this function:

◆ recognise_cell()

int recognise_cell	(	const Image *	gray,
		const BoundingBox *	box,
		int	cell_size,
		CNN *	net )

static

Predict the letter in a grid cell using Test-Time Augmentation (TTA).

Runs TTA_N forward passes centred on the bounding-box centre, each with a small ±2 px spatial shift, averages the softmax outputs, and returns the argmax class index (0='A' … 25='Z').

The crop window is cell_size × cell_size (grid pitch) so that every letter sees a consistent white border regardless of how tight the connected-component bounding box is. If cell_size is 0, a 35%-padding heuristic is used instead.

Parameters

gray	Full grayscale image.
box	Tight bounding box returned by the segmenter.
cell_size	Grid pitch in pixels (pass 0 to use the padding fallback).
net	Trained CNN.

Returns: Class index in [0, 25], or 0 on degenerate input.

Here is the call graph for this function:

Here is the caller graph for this function:

◆ set_focus()

void set_focus	(	GuiState *	g,
		int	new_focus )

static

Set keyboard focus to a field and start/stop SDL text input.

Parameters

g	Application state.
new_focus	FOCUS_IMAGE, FOCUS_MODEL, FOCUS_WORDS, or FOCUS_NONE.

Here is the caller graph for this function:

◆ to_sx()

int to_sx	(	GuiState *	g,
		int	ix )

static

Convert an image-space X coordinate to a screen X coordinate.

Here is the caller graph for this function:

◆ to_sy()

int to_sy	(	GuiState *	g,
		int	iy )

static

Convert an image-space Y coordinate to a screen Y coordinate.

Here is the caller graph for this function:

◆ update_geometry()

void update_geometry ( GuiState * g )

static

Recompute the image display rectangle after a load or window change.

Calculates the uniform scale factor that fits g->orig_img inside the image area (below PANEL_H) while preserving the aspect ratio, then stores the top-left offset (g->disp_x, g->disp_y) and g->disp_scale.

Parameters

g	Application state; `g->orig_img` must be non-NULL.

Here is the caller graph for this function:

Variable Documentation

◆ FONT_PATHS

const char* const FONT_PATHS[]

static

Initial value:

                                   = {
    "/usr/share/fonts/TTF/DejaVuSans.ttf",
    "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
    "/usr/share/fonts/dejavu/DejaVuSans.ttf",
    "/usr/share/fonts/TTF/LiberationSans-Regular.ttf",
    "/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf",
    "/usr/share/fonts/liberation-sans/LiberationSans-Regular.ttf",
    "/usr/share/fonts/TTF/FreeSans.ttf",
    "/usr/share/fonts/gnu-free/FreeSans.ttf",
    "/usr/share/fonts/noto/NotoSans-Regular.ttf",
    "/usr/share/fonts/truetype/noto/NotoSans-Regular.ttf",
    NULL
}

Classes

Macros

Functions

Variables

Detailed Description

Macro Definition Documentation

◆ TTA_N

Function Documentation

◆ active_buf()

◆ btn_hit()

◆ draw_btn()

◆ draw_input()

◆ draw_text()

◆ draw_word_highlight()

◆ fill_rect()

◆ find_font()

◆ forward_region()

◆ gui_load_image()

◆ gui_load_model()

◆ gui_render()

◆ gui_run_ocr()

◆ handle_click()

◆ handle_keydown()

◆ handle_motion()

◆ handle_text_input()

◆ input_hit()

◆ main()

◆ outline_rect()

◆ recognise_cell()

◆ set_focus()

◆ to_sx()

◆ to_sy()

◆ update_geometry()

Variable Documentation

◆ FONT_PATHS