life-todo

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README

commit 4d512d7d7bb099284c4ae1746f41da5a8411f631
parent 2cdace311ec8f454534f1dfd6ff3a0359125bd1a
Author: Michael Percival <m@michaelpercival.xyz>
Date:   Tue, 14 Apr 2026 14:06:12 +0100

Add voice message transcription via faster-whisper

Telegram voice messages are downloaded, transcribed with faster-whisper,
and passed to Claude as plain text. Model is configurable via WHISPER_MODEL
(default: small).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Diffstat:
M.env.example | 1+
Mscripts/setup.sh | 17++++++++++++++---
Ascripts/transcribe.py | 21+++++++++++++++++++++
Msrc/bot.js | 13+++++++++----
Asrc/transcribe.js | 59+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 104 insertions(+), 7 deletions(-)

diff --git a/.env.example b/.env.example @@ -1,3 +1,4 @@ TELEGRAM_BOT_TOKEN=your_telegram_bot_token_here ANTHROPIC_API_KEY=your_anthropic_api_key_here ALLOWED_USER_IDS=123456789 +WHISPER_MODEL=small diff --git a/scripts/setup.sh b/scripts/setup.sh @@ -34,9 +34,20 @@ echo "Installing npm dependencies..." npm install echo "✓ Dependencies installed" -# Create data directory (gitignored) -mkdir -p data/conversations -echo "✓ data/conversations directory ready" +# Create data directories (gitignored) +mkdir -p data/conversations data/tmp +echo "✓ data directories ready" + +# Check Python + faster-whisper for voice transcription +if ! command -v python3 &>/dev/null; then + echo "WARNING: python3 not found — voice messages will not be transcribed." + echo " Install Python 3 and run: pip install faster-whisper" +elif ! python3 -c "import faster_whisper" &>/dev/null; then + echo "WARNING: faster-whisper not installed — voice messages will not be transcribed." + echo " Run: pip install faster-whisper" +else + echo "✓ faster-whisper available" +fi # Check SSH key for git push if ! ssh-add -l &>/dev/null && [ -z "$SSH_AUTH_SOCK" ]; then diff --git a/scripts/transcribe.py b/scripts/transcribe.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python3 +"""Transcribe an audio file using faster-whisper. Prints transcription to stdout.""" + +import sys +import os + +def main(): + if len(sys.argv) < 2: + print("Usage: transcribe.py <audio_file>", file=sys.stderr) + sys.exit(1) + + audio_path = sys.argv[1] + model_size = os.environ.get("WHISPER_MODEL", "small") + + from faster_whisper import WhisperModel + model = WhisperModel(model_size, device="cpu", compute_type="int8") + segments, _ = model.transcribe(audio_path) + print(" ".join(seg.text.strip() for seg in segments)) + +if __name__ == "__main__": + main() diff --git a/src/bot.js b/src/bot.js @@ -3,6 +3,7 @@ const claude = require('./claude'); const todo = require('./todo'); const git = require('./git'); const state = require('./state'); +const { transcribeVoice } = require('./transcribe'); const ALLOWED_USER_IDS = process.env.ALLOWED_USER_IDS ? process.env.ALLOWED_USER_IDS.split(',').map(id => parseInt(id.trim(), 10)) @@ -19,9 +20,9 @@ function createBot(token) { bot.on('message', async (msg) => { const userId = msg.from.id; const chatId = msg.chat.id; - const text = msg.text; + let text = msg.text; - if (!text) return; // ignore stickers, photos, etc. + if (!text && !msg.voice) return; // ignore stickers, photos, etc. if (!isAllowed(userId)) { await bot.sendMessage(chatId, 'Unauthorized.'); @@ -29,10 +30,14 @@ function createBot(token) { } try { - // Show typing indicator while Claude thinks await bot.sendChatAction(chatId, 'typing'); - console.log(`[user:${userId}] ${text}`); + if (msg.voice) { + text = await transcribeVoice(bot, msg.voice.file_id); + console.log(`[user:${userId}] (voice) ${text}`); + } else { + console.log(`[user:${userId}] ${text}`); + } const history = state.load(userId); const currentTodo = todo.read(); diff --git a/src/transcribe.js b/src/transcribe.js @@ -0,0 +1,59 @@ +const { spawn } = require('child_process'); +const fs = require('fs'); +const path = require('path'); +const https = require('https'); +const http = require('http'); + +const SCRIPT = path.join(__dirname, '..', 'scripts', 'transcribe.py'); +const TMP_DIR = path.join(__dirname, '..', 'data', 'tmp'); + +function downloadFile(url, dest) { + return new Promise((resolve, reject) => { + fs.mkdirSync(path.dirname(dest), { recursive: true }); + const file = fs.createWriteStream(dest); + const client = url.startsWith('https') ? https : http; + client.get(url, (res) => { + res.pipe(file); + file.on('finish', () => file.close(resolve)); + }).on('error', (err) => { + fs.unlink(dest, () => {}); + reject(err); + }); + }); +} + +function runWhisper(audioPath) { + return new Promise((resolve, reject) => { + const env = { ...process.env }; + const proc = spawn('python3', [SCRIPT, audioPath], { env }); + + let stdout = ''; + let stderr = ''; + proc.stdout.on('data', (d) => { stdout += d; }); + proc.stderr.on('data', (d) => { stderr += d; }); + + proc.on('close', (code) => { + if (code !== 0) { + reject(new Error(`transcribe.py exited ${code}: ${stderr.trim()}`)); + } else { + resolve(stdout.trim()); + } + }); + }); +} + +async function transcribeVoice(bot, fileId) { + fs.mkdirSync(TMP_DIR, { recursive: true }); + const tmpPath = path.join(TMP_DIR, `${fileId}.ogg`); + + try { + const fileUrl = await bot.getFileLink(fileId); + await downloadFile(fileUrl, tmpPath); + const text = await runWhisper(tmpPath); + return text; + } finally { + fs.unlink(tmpPath, () => {}); + } +} + +module.exports = { transcribeVoice };