From e9b291078d3725a6e2ed1613066c6f1d7020e08b Mon Sep 17 00:00:00 2001 From: Kat Inskip Date: Thu, 23 Oct 2025 20:30:53 -0700 Subject: [PATCH] feat: moar ocr --- home/profiles/graphical/ocr.nix | 66 ++------------------------------- packages/ocr/default.nix | 13 +++++++ packages/ocr/ocr.sh | 53 ++++++++++++++++++++++++++ 3 files changed, 69 insertions(+), 63 deletions(-) create mode 100644 packages/ocr/default.nix create mode 100755 packages/ocr/ocr.sh diff --git a/home/profiles/graphical/ocr.nix b/home/profiles/graphical/ocr.nix index 6dc86626..d9eba974 100644 --- a/home/profiles/graphical/ocr.nix +++ b/home/profiles/graphical/ocr.nix @@ -1,65 +1,5 @@ -{ - pkgs, - lib, - ... -}: { - home.packages = let - inherit (lib.meta) getExe; - ocr = pkgs.writeShellScriptBin "ocr" '' - set -euo pipefail - pushd () { - command pushd "$@" > /dev/null - } - - popd () { - command popd "$@" > /dev/null - } - args="$(getopt -a -o r: --long rotate: -- "$@")" - - ROTATE="" - FORMAT="jpg" - - usage(){ - cat < - [ -r input | --rotate input ]: Angle to rotate the image by - [ -f input | --format input ]: Intermediary format, defaults to jpg - EOF - exit 1 - } - - if [ $# -eq 0 ]; then - usage - exit 1 - fi - - eval set -- "''${args}" - while : - do - case $1 in - -r | --rotate) ROTATE="$2" ; shift 2 ;; - -f | --format) ROTATE="$2" ; shift 2 ;; - --) shift; break ;; - *) >&2 echo Unsupported option: $1 - usage ;; - esac - done - - INTERMEDIARY="./image.''${FORMAT}" - pushd $(mktemp -d) - if [ -n "''${ROTATE}" ]; then - ${getExe pkgs.imagemagick} $1 -rotate "''${ROTATE}" "''${INTERMEDIARY}" - else - ${getExe pkgs.imagemagick} $1 "''${INTERMEDIARY}" - fi - ${getExe pkgs.ocrmypdf} -q --rotate-pages-threshold 2.0 --clean --output-type=none \ - --sidecar tmp.txt --rotate-pages --image-dpi 300 --deskew --output-type pdfa --jobs 4 \ - "''${INTERMEDIARY}" - > /dev/null - printf "%s" "$(< tmp.txt)" - popd - ''; - in [ - ocr +{pkgs, ...}: { + home.packages = [ + pkgs.ocr ]; } diff --git a/packages/ocr/default.nix b/packages/ocr/default.nix new file mode 100644 index 00000000..19905111 --- /dev/null +++ b/packages/ocr/default.nix @@ -0,0 +1,13 @@ +{ + lib, + writeShellScriptBin, + imagemagick, + ocrmypdf, +}: +writeShellScriptBin "ocr" '' + export PATH="$PATH:${lib.makeBinPath [ + imagemagick + ocrmypdf + ]}" + exec ${./ocr.sh} "$@" +'' diff --git a/packages/ocr/ocr.sh b/packages/ocr/ocr.sh new file mode 100755 index 00000000..62568cec --- /dev/null +++ b/packages/ocr/ocr.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash + +set -euo pipefail +pushd () { + command pushd "$@" > /dev/null +} + +popd () { + command popd "$@" > /dev/null +} +args="$(getopt -a -o r:f: --long rotate:,format: -- "$@")" + +ROTATE="" +FORMAT="jpg" + +usage(){ + cat <&2 echo "Unsupported option: $1" + usage ;; + esac +done + +INTERMEDIARY="./image.${FORMAT}" +pushd "$(mktemp -d)" +if [ -n "${ROTATE}" ]; then + magick "$1" -rotate "${ROTATE}" "${INTERMEDIARY}" +else + magick "$1" "${INTERMEDIARY}" +fi +ocrmypdf -q --clean --output-type=none --sidecar tmp.txt \ + --image-dpi 300 --deskew --output-type pdfa --jobs 4 \ + "${INTERMEDIARY}" - > /dev/null +printf "%s" "$(< tmp.txt)" +popd ""