From 22e8481e30d84d585a7400176870c86c73bd7972 Mon Sep 17 00:00:00 2001 From: jrosh Date: Mon, 2 Jun 2025 14:46:54 +0200 Subject: [PATCH] added config, chunkinkg and current playback text display --- src/main.rs | 645 ++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 499 insertions(+), 146 deletions(-) diff --git a/src/main.rs b/src/main.rs index f2ec4ae..fa86d03 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,20 +1,39 @@ use eframe::egui; use std::io::{self, Read}; use std::process::{Command, Stdio}; -use std::fs; use std::path::PathBuf; use tempfile::TempDir; +use std::thread; +use std::sync::mpsc; -// TTS Configuration - make this configurable later -struct TtsConfig { +#[derive(Debug, Clone)] +struct ChunkingConfig { + min_chunk_size: usize, + max_chunk_size: usize, + min_sentences: usize, +} + +impl Default for ChunkingConfig { + fn default() -> Self { + Self { + min_chunk_size: 80, + max_chunk_size: 200, + min_sentences: 2, + } + } +} + +#[derive(Debug, Clone)] +struct KokoroConfig { exec_path: String, model_path: String, voice_data: String, speed: f32, voice_style: String, + chunking: ChunkingConfig, } -impl Default for TtsConfig { +impl Default for KokoroConfig { fn default() -> Self { let home = std::env::var("HOME").unwrap_or_else(|_| ".".to_string()); Self { @@ -23,24 +42,36 @@ impl Default for TtsConfig { voice_data: format!("{}/bin/kokoros/voices-v1.0.bin", home), speed: 1.1, voice_style: "af_heart".to_string(), + chunking: ChunkingConfig::default(), } } } +#[derive(Debug, Clone)] +enum ProcessingStatus { + Idle, + Chunking, + ProcessingChunk(usize, usize), // current, total + PlayingChunk(usize, usize), + Completed, + Error(String), +} + fn main() -> Result<(), eframe::Error> { let input_text = get_piped_text(); let options = eframe::NativeOptions { viewport: egui::ViewportBuilder::default() - .with_inner_size([600.0, 400.0]) + .with_inner_size([700.0, 400.0]) + .with_resizable(true) .with_decorations(false), ..Default::default() }; eframe::run_native( - "TTS Processor", + "Kokoro TTS Processor", options, - Box::new(move |_cc| Box::new(TtsApp::new(input_text))), + Box::new(move |_cc| Box::new(TtsUi::new(input_text))), ) } @@ -54,232 +85,554 @@ fn get_piped_text() -> String { } } - "No piped text provided.\n\nUsage: echo 'text' | ./tts_processor".to_string() + "No piped text provided.\n\nUsage: echo 'text' | ./ttsui\nOr: cat file.txt | ./ttsui".to_string() } -// Text cleaning function based on your bash script fn clean_text(text: &str) -> String { - text - // Remove line breaks with hyphens - .replace("-\r", "").replace("-\n", "") - // Replace line breaks with spaces - .replace('\r', " ").replace('\n', " ") - // Compress multiple spaces - .split_whitespace().collect::>().join(" ") - // Replace double dashes with em dash - .replace("--", " — ").replace(" - ", " — ") - // Replace ellipsis - .replace("...", "…") - // Remove commas in numbers (basic version) - .replace(",", "") - // Add space after punctuation if missing (basic version) - .chars() + text.lines() + .map(|line| line.trim()) + .filter(|line| !line.is_empty()) .collect::>() - .windows(2) - .map(|w| { - if matches!(w[0], '.' | ',' | ';' | ':') && w[1] != ' ' { - format!("{} {}", w[0], w[1]) - } else { - w[0].to_string() - } - }) - .collect::() - .trim() - .to_string() + .join(" ") + .replace("-\r", "").replace("-\n", "") + .replace('\r', " ").replace('\n', " ") + .split_whitespace().collect::>().join(" ") + .replace("--", " — ").replace(" - ", " — ") + .replace("...", "…") + .replace(",", "") + .trim().to_string() } -struct TtsApp { +fn smart_chunk_text(text: &str, config: &ChunkingConfig) -> Vec { + // Split text into sentences + let sentences: Vec<&str> = text + .split(|c| matches!(c, '.' | '!' | '?')) + .map(|s| s.trim()) + .filter(|s| !s.is_empty()) + .collect(); + + if sentences.is_empty() { + return vec![text.to_string()]; + } + + let mut chunks = Vec::new(); + let mut current_chunk = String::new(); + let mut sentence_count = 0; + + for sentence in sentences { + let sentence_with_punct = format!("{}.", sentence); + + // Create the potential new chunk + let potential_chunk = if current_chunk.is_empty() { + sentence_with_punct.clone() + } else { + format!("{} {}", current_chunk, sentence_with_punct) + }; + + // Check if we should output the current chunk before adding this sentence + let should_break = !current_chunk.is_empty() && ( + potential_chunk.len() > config.max_chunk_size || + (sentence_count >= config.min_sentences && + current_chunk.len() >= config.min_chunk_size && + potential_chunk.len() >= (config.min_chunk_size as f32 * 1.5) as usize) + ); + + if should_break { + // Save current chunk and start new one + chunks.push(current_chunk.trim().to_string()); + current_chunk = sentence_with_punct; + sentence_count = 1; + } else { + // Continue building current chunk + current_chunk = potential_chunk; + sentence_count += 1; + } + } + + // Add the final chunk + if !current_chunk.is_empty() { + chunks.push(current_chunk.trim().to_string()); + } + + // Fallback + if chunks.is_empty() { + chunks.push(text.to_string()); + } + + chunks +} + +struct TtsUi { original_text: String, cleaned_text: String, + chunks: Vec, status: String, + processing_status: ProcessingStatus, + currently_playing_chunk: Option, is_processing: bool, - config: TtsConfig, + config: KokoroConfig, temp_dir: Option, + window_loaded: bool, + should_auto_process: bool, + frames_since_load: u32, + status_receiver: Option>, } -impl TtsApp { +impl TtsUi { fn new(text: String) -> Self { let cleaned = clean_text(&text); + let has_text = !cleaned.trim().is_empty() && !cleaned.starts_with("No piped text"); + Self { original_text: text, cleaned_text: cleaned, - status: "Ready to process".to_string(), + chunks: Vec::new(), + status: if has_text { "Window loading..." } else { "Waiting for text input" }.to_string(), + processing_status: ProcessingStatus::Idle, + currently_playing_chunk: None, is_processing: false, - config: TtsConfig::default(), + config: KokoroConfig::default(), temp_dir: None, + window_loaded: false, + should_auto_process: has_text, + frames_since_load: 0, + status_receiver: None, } } fn start_tts_processing(&mut self) { - if self.is_processing { + if self.is_processing || self.cleaned_text.trim().is_empty() { return; } self.is_processing = true; - self.status = "Starting TTS processing...".to_string(); + self.processing_status = ProcessingStatus::Chunking; + self.status = "Chunking text...".to_string(); + + // Create chunks + self.chunks = smart_chunk_text(&self.cleaned_text, &self.config.chunking); + self.status = format!("Created {} chunks", self.chunks.len()); - // Create temporary directory match TempDir::new() { Ok(temp_dir) => { self.temp_dir = Some(temp_dir); - self.process_text(); + self.process_chunks(); } Err(e) => { self.status = format!("Error creating temp directory: {}", e); + self.processing_status = ProcessingStatus::Error(e.to_string()); self.is_processing = false; } } } - fn process_text(&mut self) { + fn process_chunks(&mut self) { if let Some(ref temp_dir) = self.temp_dir { - let temp_path = temp_dir.path(); - let audio_file = temp_path.join("output.wav"); + let temp_path = temp_dir.path().to_path_buf(); + let chunks = self.chunks.clone(); + let config = self.config.clone(); - self.status = "Generating audio...".to_string(); + // Create a channel for status updates + let (sender, receiver) = mpsc::channel(); + self.status_receiver = Some(receiver); - // Run TTS command - let result = Command::new(&self.config.exec_path) - .arg("--model").arg(&self.config.model_path) - .arg("--data").arg(&self.config.voice_data) - .arg("--speed").arg(self.config.speed.to_string()) - .arg("--style").arg(&self.config.voice_style) - .arg("text").arg(&self.cleaned_text) + // Process chunks in background thread + thread::spawn(move || { + Self::process_chunks_background(temp_path, chunks, config, sender); + }); + + self.status = "Starting chunk processing...".to_string(); + } + } + + fn process_chunks_background( + temp_path: PathBuf, + chunks: Vec, + config: KokoroConfig, + sender: mpsc::Sender + ) { + let total_chunks = chunks.len(); + + // Channel to signal when new audio is ready + let (audio_sender, audio_receiver) = mpsc::channel::(); + + // Spawn audio player thread + let player_sender = sender.clone(); + thread::spawn(move || { + let mut chunk_num = 1; + + // Play audio files as they become available + while let Ok(audio_file) = audio_receiver.recv() { + let _ = player_sender.send(ProcessingStatus::PlayingChunk(chunk_num, total_chunks)); + Self::play_audio_sync(&audio_file); + chunk_num += 1; + + // If this was the last chunk, we're done + if chunk_num > total_chunks { + let _ = player_sender.send(ProcessingStatus::Completed); + break; + } + } + }); + + // Process each chunk and send to player as soon as ready + for (i, chunk) in chunks.iter().enumerate() { + let chunk_num = i + 1; + + // Send status update + let _ = sender.send(ProcessingStatus::ProcessingChunk(chunk_num, total_chunks)); + + let audio_file = temp_path.join(format!("chunk_{}.wav", i)); + + println!("Processing chunk {}/{}: {}...", chunk_num, total_chunks, + &chunk.chars().take(40).collect::()); + + let result = Command::new(&config.exec_path) + .arg("--model").arg(&config.model_path) + .arg("--data").arg(&config.voice_data) + .arg("--speed").arg(config.speed.to_string()) + .arg("--style").arg(&config.voice_style) + .arg("text").arg(chunk) .arg("--output").arg(&audio_file) - .stdout(Stdio::piped()) - .stderr(Stdio::piped()) .output(); match result { Ok(output) => { if output.status.success() && audio_file.exists() { - self.status = "Audio generated, starting playback...".to_string(); - self.play_audio(&audio_file); + // Send this audio file to the player immediately + if audio_sender.send(audio_file).is_err() { + eprintln!("Failed to send audio file to player"); + let _ = sender.send(ProcessingStatus::Error( + "Audio player thread disconnected".to_string() + )); + return; + } } else { - let error_msg = String::from_utf8_lossy(&output.stderr); - self.status = format!("TTS failed: {}", error_msg); - self.is_processing = false; + eprintln!("Kokoro failed for chunk {}: {}", chunk_num, + String::from_utf8_lossy(&output.stderr)); + let _ = sender.send(ProcessingStatus::Error( + format!("Failed to process chunk {}", chunk_num) + )); + return; } } Err(e) => { - self.status = format!("Failed to run TTS: {}", e); - self.is_processing = false; + eprintln!("Failed to run Kokoro for chunk {}: {}", chunk_num, e); + let _ = sender.send(ProcessingStatus::Error( + format!("Failed to run Kokoro for chunk {}: {}", chunk_num, e) + )); + return; } } } + + drop(audio_sender); + + println!("✅ All chunks processed!"); } - fn play_audio(&mut self, audio_file: &PathBuf) { - // Try different audio players - let players = ["aplay", "paplay", "play", "ffplay"]; + fn play_audio_sync(audio_file: &PathBuf) { + let players = ["aplay", "paplay", "play", "ffplay", "mpg123"]; for player in &players { let result = Command::new(player) .arg(audio_file) .stdout(Stdio::null()) - .stderr(Stdio::piped()) + .stderr(Stdio::null()) .spawn(); match result { Ok(mut child) => { - self.status = format!("Playing audio with {}...", player); - - // Wait for playback to complete + println!("Playing audio with {}...", player); match child.wait() { Ok(exit_status) => { if exit_status.success() { - self.status = "Playback completed!".to_string(); - } else { - self.status = format!("Playback failed with {}", player); + println!("✅ Chunk played successfully!"); + return; } } Err(e) => { - self.status = format!("Error waiting for playback: {}", e); + eprintln!("Error during playback: {}", e); } } - self.is_processing = false; - return; } - Err(_) => { - // Try next player - continue; + Err(_) => continue, + } + } + + eprintln!("❌ No audio player found"); + } +} + +impl eframe::App for TtsUi { + fn update(&mut self, ctx: &egui::Context, _frame: &mut eframe::Frame) { + // Check for status updates from background processing + if let Some(ref receiver) = self.status_receiver { + if let Ok(new_status) = receiver.try_recv() { + match &new_status { + ProcessingStatus::ProcessingChunk(current, total) => { + self.status = format!("Processing chunk {}/{}", current, total); + } + ProcessingStatus::PlayingChunk(current, total) => { + self.status = format!("Playing chunk {}/{}", current, total); + self.currently_playing_chunk = Some(*current); + } + ProcessingStatus::Completed => { + self.status = "✅ All chunks completed successfully!".to_string(); + self.currently_playing_chunk = None; + self.is_processing = false; + } + ProcessingStatus::Error(err) => { + self.status = format!("❌ Error: {}", err); + self.currently_playing_chunk = None; + self.is_processing = false; + } + _ => {} + } + self.processing_status = new_status; + } + } + + if !self.window_loaded { + self.frames_since_load += 1; + + if self.frames_since_load > 5 { + self.window_loaded = true; + self.status = "Window loaded!".to_string(); + + // Start auto-processing if provided text + if self.should_auto_process { + self.start_tts_processing(); } } } - self.status = "No audio player found (tried: aplay, paplay, play, ffplay)".to_string(); - self.is_processing = false; - } -} - -impl eframe::App for TtsApp { - fn update(&mut self, ctx: &egui::Context, _frame: &mut eframe::Frame) { egui::CentralPanel::default().show(ctx, |ui| { - ui.vertical(|ui| { - // Title and status - ui.heading("🔊 TTS Processor"); - ui.label(format!("Status: {}", self.status)); - ui.separator(); - ui.add_space(10.0); - - // Control buttons - ui.horizontal(|ui| { - if ui.button("🎵 Start TTS Processing").clicked() && !self.is_processing { - self.start_tts_processing(); - } - - ui.separator(); - - if ui.button("❌ Exit").clicked() { - ctx.send_viewport_cmd(egui::ViewportCommand::Close); - } - }); - - ui.add_space(10.0); - ui.separator(); - ui.add_space(10.0); - - // Text display - ui.label("📝 Original Text:"); - egui::ScrollArea::vertical() - .max_height(100.0) - .show(ui, |ui| { - ui.add( - egui::TextEdit::multiline(&mut self.original_text.as_str()) - .desired_width(f32::INFINITY) - .interactive(false) - ); + egui::ScrollArea::vertical() + .auto_shrink([false; 2]) + .show(ui, |ui| { + ui.vertical(|ui| { + ui.horizontal(|ui| { + ui.label("Status:"); + if self.is_processing { + ui.spinner(); + } + ui.label(&self.status); + }); + ui.separator(); + ui.add_space(10.0); + + + // Current audio display + ui.horizontal(|ui| { + ui.label("🎵 Currently Playing:"); + if let Some(playing_chunk) = self.currently_playing_chunk { + ui.label(format!("Chunk {}/{}", playing_chunk, self.chunks.len())); + } else { + match &self.processing_status { + ProcessingStatus::Completed => { + ui.label("Playback completed"); + } + ProcessingStatus::Error(_) => { + ui.label("Error occurred"); + } + ProcessingStatus::ProcessingChunk(_, _) => { + ui.label("Preparing audio..."); + } + _ => { + ui.label("Ready to start"); + } + } + } + }); + + // Show the chunk that is currently being played + if let Some(playing_chunk) = self.currently_playing_chunk { + if let Some(playing_text) = self.chunks.get(playing_chunk - 1) { + egui::ScrollArea::vertical() + .max_height(120.0) + .show(ui, |ui| { + ui.add( + egui::TextEdit::multiline(&mut playing_text.as_str()) + .desired_width(f32::INFINITY) + .desired_rows(1) + .interactive(false) + ); + }); + } + } else { + match &self.processing_status { + ProcessingStatus::Completed => { + ui.label("🎉 All audio chunks have been played successfully!"); + } + ProcessingStatus::Error(err) => { + ui.colored_label(egui::Color32::RED, format!("❌ Error: {}", err)); + } + ProcessingStatus::ProcessingChunk(current, total) => { + ui.label(format!("⏳ Processing chunk {}/{}... Audio will start soon.", current, total)); + } + _ => { + // When not playing anything, show the input text for editing + if !self.is_processing && self.chunks.is_empty() { + egui::ScrollArea::vertical() + .max_height(120.0) + .show(ui, |ui| { + ui.add( + egui::TextEdit::multiline(&mut self.cleaned_text) + .desired_width(f32::INFINITY) + .desired_rows(5) + .interactive(!self.is_processing) + ); + }); + } else { + ui.label("⏳ Waiting for audio playback to begin..."); + } + } + } + } + + ui.add_space(10.0); + ui.separator(); + ui.add_space(5.0); + + + // Collapsible text input section (moved above current audio) + ui.collapsing("📝 Text Input", |ui| { + ui.horizontal(|ui| { + ui.label("Text to process:"); + ui.label(format!("({} characters)", self.cleaned_text.len())); + if !self.chunks.is_empty() { + ui.label(format!("- {} chunks", self.chunks.len())); + } + }); + + egui::ScrollArea::vertical() + .max_height(150.0) + .show(ui, |ui| { + ui.add( + egui::TextEdit::multiline(&mut self.cleaned_text) + .desired_width(f32::INFINITY) + .desired_rows(8) + .interactive(!self.is_processing) + ); + }); + + // Show all chunks if available + if !self.chunks.is_empty() { + ui.add_space(5.0); + ui.label("📄 All Chunks:"); + egui::ScrollArea::vertical() + .max_height(100.0) + .show(ui, |ui| { + for (i, chunk) in self.chunks.iter().enumerate() { + let chunk_preview = if chunk.len() > 80 { + format!("{}...", &chunk.chars().take(80).collect::()) + } else { + chunk.clone() + }; + + // Highlight current chunk being processed or played + let is_current = match &self.processing_status { + ProcessingStatus::ProcessingChunk(current, _) => *current == i + 1, + ProcessingStatus::PlayingChunk(current, _) => *current == i + 1, + _ => false, + }; + + if is_current { + ui.colored_label(egui::Color32::GREEN, format!("▶ {}: {}", i + 1, chunk_preview)); + } else { + ui.label(format!("{}: {}", i + 1, chunk_preview)); + } + } + }); + } + }); + + ui.add_space(10.0); + + // Controls + ui.horizontal(|ui| { + if ui.button("🎵 Process with Kokoro").clicked() && !self.is_processing { + self.start_tts_processing(); + } + + ui.separator(); + + if ui.button("❌ Exit").clicked() { + ctx.send_viewport_cmd(egui::ViewportCommand::Close); + } + }); + + ui.add_space(10.0); + ui.separator(); + ui.add_space(5.0); + + + + // Configuration panel + ui.collapsing("⚙ Configuration", |ui| { + ui.horizontal(|ui| { + ui.label("Speed:"); + ui.add(egui::Slider::new(&mut self.config.speed, 0.5..=2.0).step_by(0.1)); + }); + + ui.horizontal(|ui| { + ui.label("Voice Style:"); + ui.text_edit_singleline(&mut self.config.voice_style); + }); + + ui.separator(); + ui.label("Chunking Settings:"); + + ui.horizontal(|ui| { + ui.label("Min chunk size:"); + ui.add(egui::DragValue::new(&mut self.config.chunking.min_chunk_size).clamp_range(20..=500)); + }); + + ui.horizontal(|ui| { + ui.label("Max chunk size:"); + ui.add(egui::DragValue::new(&mut self.config.chunking.max_chunk_size).clamp_range(50..=1000)); + }); + + ui.horizontal(|ui| { + ui.label("Min sentences:"); + ui.add(egui::DragValue::new(&mut self.config.chunking.min_sentences).clamp_range(1..=10)); + }); + + ui.separator(); + ui.label("Paths:"); + + ui.horizontal(|ui| { + ui.label("Executable:"); + ui.text_edit_singleline(&mut self.config.exec_path); + }); + + ui.horizontal(|ui| { + ui.label("Model Path:"); + ui.text_edit_singleline(&mut self.config.model_path); + }); + + ui.horizontal(|ui| { + ui.label("Voice Data:"); + ui.text_edit_singleline(&mut self.config.voice_data); + }); + }); + + ui.add_space(10.0); + ui.separator(); + ui.add_space(5.0); }); - - ui.add_space(10.0); - - ui.label("✨ Cleaned Text (will be processed):"); - egui::ScrollArea::vertical() - .max_height(120.0) - .show(ui, |ui| { - ui.add( - egui::TextEdit::multiline(&mut self.cleaned_text.as_str()) - .desired_width(f32::INFINITY) - .interactive(false) - ); - }); - - ui.add_space(10.0); - - // Configuration display - ui.collapsing("⚙️ TTS Configuration", |ui| { - ui.label(format!("Executable: {}", self.config.exec_path)); - ui.label(format!("Model: {}", self.config.model_path)); - ui.label(format!("Voice Data: {}", self.config.voice_data)); - ui.label(format!("Speed: {}", self.config.speed)); - ui.label(format!("Voice Style: {}", self.config.voice_style)); }); - }); }); // Close on Escape if ctx.input(|i| i.key_pressed(egui::Key::Escape)) { ctx.send_viewport_cmd(egui::ViewportCommand::Close); } + + // Request repaint to keep checking window load status + if !self.window_loaded { + ctx.request_repaint(); + } } -} +} \ No newline at end of file