|
OCR Project
|
SDL2 graphical interface for the OCR crossword solver. More...
#include <SDL2/SDL.h>#include <SDL2/SDL_ttf.h>#include "src/cnn/cnn.h"#include "src/cnn/model.h"#include "src/preprocess/image.h"#include "src/segment/segment.h"#include "src/solver/solver.h"#include <stdio.h>#include <stdlib.h>#include <string.h>#include <sys/stat.h>
Classes | |
| struct | GuiState |
| Complete application state passed to every GUI function. More... | |
Macros | |
| #define | WIN_W 1280 |
| #define | WIN_H 800 |
| #define | ROW_H 36 |
| #define | ROW_PAD 8 |
| #define | LABEL_W 68 |
| #define | BTN_W 120 |
| #define | FONT_SIZE 15 |
| #define | FONT_SIZE_SM 13 |
| #define | ROW1_Y 8 |
| #define | ROW2_Y (ROW1_Y + ROW_H + ROW_PAD) |
| #define | ROW3_Y (ROW2_Y + ROW_H + ROW_PAD) |
| #define | STATUS_Y (ROW3_Y + ROW_H + 6) |
| #define | PANEL_H (STATUS_Y + FONT_SIZE_SM + 8) |
| #define | INPUT_X (LABEL_W + 4) |
| #define | INPUT_W (WIN_W - INPUT_X - BTN_W - 8 - 8) |
| #define | BTN_X (INPUT_X + INPUT_W + 8) |
| #define | MAX_RESULTS 64 |
| #define | DEFAULT_MODEL_DIR "models/" |
| #define | FOCUS_NONE 0 |
| #define | FOCUS_IMAGE 1 |
| #define | FOCUS_MODEL 2 |
| #define | FOCUS_WORDS 3 |
| #define | TTA_N 5 |
Functions | |
| static void | gui_render (GuiState *g) |
| Composite and present one complete frame. | |
| static TTF_Font * | find_font (int size) |
| Open the first TTF font found in the system font search paths. | |
| static void | fill_rect (SDL_Renderer *r, int x, int y, int w, int h, Uint8 cr, Uint8 cg, Uint8 cb, Uint8 ca) |
| Draw a solid filled rectangle. | |
| static void | outline_rect (SDL_Renderer *r, int x, int y, int w, int h, Uint8 cr, Uint8 cg, Uint8 cb, Uint8 ca) |
| Draw a 1-pixel outline rectangle (no fill). | |
| static void | draw_text (GuiState *g, TTF_Font *f, const char *txt, int x, int y, Uint8 cr, Uint8 cg, Uint8 cb) |
| Render a UTF-8 string at pixel position (x, y). | |
| static void | draw_btn (GuiState *g, int btn_id, const char *label, int x, int y, int busy_flag) |
| Draw an action button with a centred text label. | |
| static void | draw_input (GuiState *g, int field_id, const char *buf, const char *placeholder, int row_y) |
| Draw a text-input field with optional placeholder and blinking cursor. | |
| static void | update_geometry (GuiState *g) |
| Recompute the image display rectangle after a load or window change. | |
| static int | to_sx (GuiState *g, int ix) |
| Convert an image-space X coordinate to a screen X coordinate. | |
| static int | to_sy (GuiState *g, int iy) |
| Convert an image-space Y coordinate to a screen Y coordinate. | |
| static void | gui_load_image (GuiState *g, const char *path) |
| Load a PNG image and create the display texture. | |
| static void | gui_load_model (GuiState *g, const char *path) |
| Load CNN weights from a binary model file. | |
| static void | forward_region (const Image *gray, int x1, int y1, int x2, int y2, CNN *net, float *probs) |
| Run one CNN forward pass on a rectangular sub-region of a grayscale image. | |
| static int | recognise_cell (const Image *gray, const BoundingBox *box, int cell_size, CNN *net) |
| Predict the letter in a grid cell using Test-Time Augmentation (TTA). | |
| static void | gui_run_ocr (GuiState *g) |
| Run the full OCR pipeline and word search, then store results. | |
| static void | draw_word_highlight (GuiState *g, const WordResult *r) |
| Overlay a semi-transparent red rectangle on each cell of a found word. | |
| static int | btn_hit (int mx, int my, int row_y) |
| Test whether a mouse position hits the action button on a given row. | |
| static int | input_hit (int mx, int my, int row_y) |
| Test whether a mouse position hits the text-input field on a given row. | |
| static void | set_focus (GuiState *g, int new_focus) |
| Set keyboard focus to a field and start/stop SDL text input. | |
| static void | handle_click (GuiState *g, int mx, int my) |
| Handle a left mouse-button click. | |
| static void | handle_motion (GuiState *g, int mx, int my) |
| Update the hovered-button state on mouse motion. | |
| static char * | active_buf (GuiState *g, size_t *cap) |
| Return a pointer to the text buffer of the currently focused field. | |
| static void | handle_keydown (GuiState *g, SDL_Keycode key) |
| Handle SDL_KEYDOWN events for the focused text field. | |
| static void | handle_text_input (GuiState *g, const char *text) |
| Append SDL_TEXTINPUT characters to the focused field's buffer. | |
| int | main (int argc, char **argv) |
| Entry point for the GUI binary. | |
Variables | |
| static const char *const | FONT_PATHS [] |
SDL2 graphical interface for the OCR crossword solver.
Provides a 1280×800 window with three text-input rows:
Clicking Charger (or pressing Enter) loads the file. Clicking Chercher (or pressing Enter in the words field) runs the full OCR pipeline and overlays red rectangles on each found-word cell. The original image pixels are never modified.
| #define TTA_N 5 |
|
static |
Return a pointer to the text buffer of the currently focused field.
| g | Application state. |
| cap | Output: byte capacity of the returned buffer. |

|
static |
Test whether a mouse position hits the action button on a given row.
| mx,my | Mouse cursor position in window coordinates. |
| row_y | Top-left Y of the row to test. |

|
static |
Draw an action button with a centred text label.
The button colour changes when hovered (btn_id matches g->hovered_btn) and dims when busy_flag is set.
| g | Application state. |
| btn_id | Button identity (1=load image, 2=load model, 3=search). |
| label | UTF-8 label string displayed on the button. |
| x,y | Top-left pixel of the button (width is always BTN_W). |
| busy_flag | Non-zero while the OCR pipeline is running (dims button). |


|
static |
Draw a text-input field with optional placeholder and blinking cursor.
The field is highlighted with a blue border when it has focus (field_id == g->focused). If buf is empty the placeholder text is rendered in a dim colour. A blinking cursor is drawn after the last character when the field is focused.
| g | Application state. |
| field_id | FOCUS_IMAGE / FOCUS_MODEL / FOCUS_WORDS. |
| buf | Current text content of the field. |
| placeholder | Hint text displayed when buf is empty. |
| row_y | Top-left Y of the row (field is placed at INPUT_X). |


|
static |
Render a UTF-8 string at pixel position (x, y).
Creates a temporary texture from the rendered glyph surface, copies it to the renderer, then destroys it. No-op if f or txt is NULL/empty.
| g | Application state (provides renderer). |
| f | Font to use. |
| txt | UTF-8 string to render. |
| x,y | Top-left pixel of the text. |
| cr,cg,cb | RGB colour. |

|
static |
Overlay a semi-transparent red rectangle on each cell of a found word.
Iterates from (start_r, start_c) to (end_r, end_c) using the direction deltas derived from the WordResult, maps each cell index to a BoundingBox in g->cells, converts the bounding-box centre to screen coordinates via to_sx() / to_sy(), and draws a filled + outlined rectangle scaled by g->disp_scale.
| g | Application state (provides cells, grid_cols, disp_scale, renderer). |
| r | Solver result for one word; no-op if r->found is 0. |


|
static |
Draw a solid filled rectangle.
| r | SDL renderer. |
| x,y | Top-left corner. |
| w,h | Dimensions in pixels. |
| cr,cg,cb,ca | RGBA colour components. |

|
static |
Open the first TTF font found in the system font search paths.
Iterates over FONT_PATHS and returns the first font that can be opened at the requested point size. Intended to avoid a hard dependency on a specific font package.
| size | Desired point size. |

|
static |
Run one CNN forward pass on a rectangular sub-region of a grayscale image.
Copies the region [x1, x2) × [y1, y2) from gray into a temporary Image, binarizes it locally, resizes to CNN_IMG_W × CNN_IMG_H, then calls cnn_forward(). The resulting softmax probabilities are added to probs (not overwritten), allowing TTA accumulation.
| gray | Full grayscale RGBA image (R=G=B=luminance). |
| x1,y1 | Top-left of the region (clamped to image bounds). |
| x2,y2 | Bottom-right exclusive (clamped to image bounds). |
| net | Trained CNN. |
| probs | Array of CNN_N_CLASSES floats; results are added here. |


|
static |
Load a PNG image and create the display texture.
Frees any previously loaded image, texture, and OCR results, then loads the PNG at path via image_load_png(). An SDL texture is created from the raw RGBA pixels (SDL_PIXELFORMAT_RGBA32) without copying them. update_geometry() is called to recompute the display rectangle.
| g | Application state. |
| path | Path to the PNG file. |


|
static |
Load CNN weights from a binary model file.
Allocates g->net if necessary, then calls model_load(). On success g->model_buf already holds the path (set by the caller); the status bar is updated to reflect the loaded filename.
| g | Application state. |
| path | Path to the .bin model file. |


|
static |
Composite and present one complete frame.
Drawing order:
Called every ~16 ms from the event loop and also mid-OCR to show progress.
| g | Application state. |


|
static |
Run the full OCR pipeline and word search, then store results.
Steps performed:
g->image_buf and convert to grayscale.g->words_buf.g->cells for highlight rendering.The status bar is updated at each major step so the render loop can show progress messages. The original image (g->orig_img) is never touched.
| g | Application state — must have orig_img and net set. |


|
static |
Handle a left mouse-button click.
Updates focus based on which field was clicked, then triggers the appropriate action if an action button was hit:
| g | Application state. |
| mx,my | Mouse cursor position. |


|
static |
Handle SDL_KEYDOWN events for the focused text field.
Supported keys:
| g | Application state. |
| key | SDL key symbol. |


|
static |
Update the hovered-button state on mouse motion.
| g | Application state. |
| mx,my | Current mouse cursor position. |


|
static |
Append SDL_TEXTINPUT characters to the focused field's buffer.
SDL delivers printable characters via SDL_TEXTINPUT events (already converted from key codes with correct locale/IME handling). The text is appended only if the buffer has room.
| g | Application state. |
| text | UTF-8 string from the SDL_TEXTINPUT event. |


|
static |
Test whether a mouse position hits the text-input field on a given row.
| mx,my | Mouse cursor position in window coordinates. |
| row_y | Top-left Y of the row to test. |

| int main | ( | int | argc, |
| char ** | argv ) |
Entry point for the GUI binary.
Initialises SDL2 and SDL2_ttf, creates the window and renderer, loads the most recently modified model from models/ (or the path given via --model), then enters the event loop. The loop runs at ~60 fps and dispatches events to the appropriate handler before calling gui_render().
| argc | Argument count. |
| argv | Argument vector. Accepted options: --model <path> — explicit model file. |

|
static |
Draw a 1-pixel outline rectangle (no fill).
| r | SDL renderer. |
| x,y | Top-left corner. |
| w,h | Dimensions in pixels. |
| cr,cg,cb,ca | RGBA colour components. |

|
static |
Predict the letter in a grid cell using Test-Time Augmentation (TTA).
Runs TTA_N forward passes centred on the bounding-box centre, each with a small ±2 px spatial shift, averages the softmax outputs, and returns the argmax class index (0='A' … 25='Z').
The crop window is cell_size × cell_size (grid pitch) so that every letter sees a consistent white border regardless of how tight the connected-component bounding box is. If cell_size is 0, a 35%-padding heuristic is used instead.
| gray | Full grayscale image. |
| box | Tight bounding box returned by the segmenter. |
| cell_size | Grid pitch in pixels (pass 0 to use the padding fallback). |
| net | Trained CNN. |


|
static |
Set keyboard focus to a field and start/stop SDL text input.
| g | Application state. |
| new_focus | FOCUS_IMAGE, FOCUS_MODEL, FOCUS_WORDS, or FOCUS_NONE. |

|
static |
Convert an image-space X coordinate to a screen X coordinate.

|
static |
Convert an image-space Y coordinate to a screen Y coordinate.

|
static |
Recompute the image display rectangle after a load or window change.
Calculates the uniform scale factor that fits g->orig_img inside the image area (below PANEL_H) while preserving the aspect ratio, then stores the top-left offset (g->disp_x, g->disp_y) and g->disp_scale.
| g | Application state; g->orig_img must be non-NULL. |

|
static |