added config, chunkinkg and current playback text display

2025-06-02 14:46:54 +02:00 · 2025-06-02 14:46:54 +02:00 · 22e8481e30
commit 22e8481e30
parent 4a70c57c8b
1 changed files with 489 additions and 136 deletions
--- a/src/main.rs
+++ b/src/main.rs
@ -1,20 +1,39 @@
 use eframe::egui;
 use std::io::{self, Read};
 use std::process::{Command, Stdio};
 use std::fs;
 use std::path::PathBuf;
 use tempfile::TempDir;
 use std::thread;
 use std::sync::mpsc;
-// TTS Configuration - make this configurable later
+#[derive(Debug, Clone)]
-struct TtsConfig {
+struct ChunkingConfig {
    min_chunk_size: usize,
    max_chunk_size: usize,
    min_sentences: usize,
 }
 impl Default for ChunkingConfig {
    fn default() -> Self {
        Self {
            min_chunk_size: 80,
            max_chunk_size: 200,
            min_sentences: 2,
        }
    }
 }
 #[derive(Debug, Clone)]
 struct KokoroConfig {
    exec_path: String,
    model_path: String,
    voice_data: String,
    speed: f32,
    voice_style: String,
    chunking: ChunkingConfig,
 }
-impl Default for TtsConfig {
+impl Default for KokoroConfig {
    fn default() -> Self {
        let home = std::env::var("HOME").unwrap_or_else(|_| ".".to_string());
        Self {
@ -23,24 +42,36 @@ impl Default for TtsConfig {
            voice_data: format!("{}/bin/kokoros/voices-v1.0.bin", home),
            speed: 1.1,
            voice_style: "af_heart".to_string(),
            chunking: ChunkingConfig::default(),
        }
    }
 }
 #[derive(Debug, Clone)]
 enum ProcessingStatus {
    Idle,
    Chunking,
    ProcessingChunk(usize, usize), // current, total
    PlayingChunk(usize, usize),
    Completed,
    Error(String),
 }
 fn main() -> Result<(), eframe::Error> {
    let input_text = get_piped_text();
    let options = eframe::NativeOptions {
        viewport: egui::ViewportBuilder::default()
-            .with_inner_size([600.0, 400.0])
+            .with_inner_size([700.0, 400.0])
            .with_resizable(true)
            .with_decorations(false),
        ..Default::default()
    };
    eframe::run_native(
-        "TTS Processor",
+        "Kokoro TTS Processor",
        options,
-        Box::new(move |_cc| Box::new(TtsApp::new(input_text))),
+        Box::new(move |_cc| Box::new(TtsUi::new(input_text))),
    )
 }
@ -54,177 +85,474 @@ fn get_piped_text() -> String {
        }
    }
-    "No piped text provided.\n\nUsage: echo 'text' | ./tts_processor".to_string()
+    "No piped text provided.\n\nUsage: echo 'text' | ./ttsui\nOr: cat file.txt | ./ttsui".to_string()
 }
 // Text cleaning function based on your bash script
 fn clean_text(text: &str) -> String {
-    text
+    text.lines()
-        // Remove line breaks with hyphens
+        .map(|line| line.trim())
-        .replace("-\r", "").replace("-\n", "")
+        .filter(|line| !line.is_empty())
        // Replace line breaks with spaces
        .replace('\r', " ").replace('\n', " ")
        // Compress multiple spaces
        .split_whitespace().collect::<Vec<_>>().join(" ")
        // Replace double dashes with em dash
        .replace("--", " — ").replace(" - ", " — ")
        // Replace ellipsis
        .replace("...", "…")
        // Remove commas in numbers (basic version)
        .replace(",", "")
        // Add space after punctuation if missing (basic version)
        .chars()
        .collect::<Vec<_>>()
-        .windows(2)
+        .join(" ")
-        .map(|w| {
+        .replace("-\r", "").replace("-\n", "")
-            if matches!(w[0], '.' | ',' | ';' | ':') && w[1] != ' ' {
+        .replace('\r', " ").replace('\n', " ")
-                format!("{} {}", w[0], w[1])
+        .split_whitespace().collect::<Vec<_>>().join(" ")
-            } else {
+        .replace("--", " — ").replace(" - ", " — ")
-                w[0].to_string()
+        .replace("...", "…")
-            }
+        .replace(",", "")
-        })
+        .trim().to_string()
        .collect::<String>()
        .trim()
        .to_string()
 }
-struct TtsApp {
+fn smart_chunk_text(text: &str, config: &ChunkingConfig) -> Vec<String> {
    // Split text into sentences
    let sentences: Vec<&str> = text
        .split(|c| matches!(c, '.' | '!' | '?'))
        .map(|s| s.trim())
        .filter(|s| !s.is_empty())
        .collect();
    if sentences.is_empty() {
        return vec![text.to_string()];
    }
    let mut chunks = Vec::new();
    let mut current_chunk = String::new();
    let mut sentence_count = 0;
    for sentence in sentences {
        let sentence_with_punct = format!("{}.", sentence);
        // Create the potential new chunk
        let potential_chunk = if current_chunk.is_empty() {
            sentence_with_punct.clone()
        } else {
            format!("{} {}", current_chunk, sentence_with_punct)
        };
        // Check if we should output the current chunk before adding this sentence
        let should_break = !current_chunk.is_empty() && (
            potential_chunk.len() > config.max_chunk_size ||
            (sentence_count >= config.min_sentences && 
             current_chunk.len() >= config.min_chunk_size &&
             potential_chunk.len() >= (config.min_chunk_size as f32 * 1.5) as usize)
        );
        if should_break {
            // Save current chunk and start new one
            chunks.push(current_chunk.trim().to_string());
            current_chunk = sentence_with_punct;
            sentence_count = 1;
        } else {
            // Continue building current chunk
            current_chunk = potential_chunk;
            sentence_count += 1;
        }
    }
    // Add the final chunk
    if !current_chunk.is_empty() {
        chunks.push(current_chunk.trim().to_string());
    }
    // Fallback
    if chunks.is_empty() {
        chunks.push(text.to_string());
    }
    chunks
 }
 struct TtsUi {
    original_text: String,
    cleaned_text: String,
    chunks: Vec<String>,
    status: String,
    processing_status: ProcessingStatus,
    currently_playing_chunk: Option<usize>,
    is_processing: bool,
-    config: TtsConfig,
+    config: KokoroConfig,
    temp_dir: Option<TempDir>,
    window_loaded: bool,
    should_auto_process: bool,
    frames_since_load: u32,
    status_receiver: Option<mpsc::Receiver<ProcessingStatus>>,
 }
-impl TtsApp {
+impl TtsUi {
    fn new(text: String) -> Self {
        let cleaned = clean_text(&text);
        let has_text = !cleaned.trim().is_empty() && !cleaned.starts_with("No piped text");
        Self {
            original_text: text,
            cleaned_text: cleaned,
-            status: "Ready to process".to_string(),
+            chunks: Vec::new(),
            status: if has_text { "Window loading..." } else { "Waiting for text input" }.to_string(),
            processing_status: ProcessingStatus::Idle,
            currently_playing_chunk: None,
            is_processing: false,
-            config: TtsConfig::default(),
+            config: KokoroConfig::default(),
            temp_dir: None,
            window_loaded: false,
            should_auto_process: has_text,
            frames_since_load: 0,
            status_receiver: None,
        }
    }
    fn start_tts_processing(&mut self) {
-        if self.is_processing {
+        if self.is_processing || self.cleaned_text.trim().is_empty() {
            return;
        }
        self.is_processing = true;
-        self.status = "Starting TTS processing...".to_string();
+        self.processing_status = ProcessingStatus::Chunking;
        self.status = "Chunking text...".to_string();
        // Create chunks
        self.chunks = smart_chunk_text(&self.cleaned_text, &self.config.chunking);
        self.status = format!("Created {} chunks", self.chunks.len());
        // Create temporary directory
        match TempDir::new() {
            Ok(temp_dir) => {
                self.temp_dir = Some(temp_dir);
-                self.process_text();
+                self.process_chunks();
            }
            Err(e) => {
                self.status = format!("Error creating temp directory: {}", e);
                self.processing_status = ProcessingStatus::Error(e.to_string());
                self.is_processing = false;
            }
        }
    }
-    fn process_text(&mut self) {
+    fn process_chunks(&mut self) {
        if let Some(ref temp_dir) = self.temp_dir {
-            let temp_path = temp_dir.path();
+            let temp_path = temp_dir.path().to_path_buf();
-            let audio_file = temp_path.join("output.wav");
+            let chunks = self.chunks.clone();
            let config = self.config.clone();
-            self.status = "Generating audio...".to_string();
+            // Create a channel for status updates
            let (sender, receiver) = mpsc::channel();
            self.status_receiver = Some(receiver);
-            // Run TTS command
+            // Process chunks in background thread
-            let result = Command::new(&self.config.exec_path)
+            thread::spawn(move || {
-                .arg("--model").arg(&self.config.model_path)
+                Self::process_chunks_background(temp_path, chunks, config, sender);
-                .arg("--data").arg(&self.config.voice_data)
+            });
-                .arg("--speed").arg(self.config.speed.to_string())
+            
-                .arg("--style").arg(&self.config.voice_style)
+            self.status = "Starting chunk processing...".to_string();
-                .arg("text").arg(&self.cleaned_text)
+        }
    }
    fn process_chunks_background(
        temp_path: PathBuf, 
        chunks: Vec<String>, 
        config: KokoroConfig,
        sender: mpsc::Sender<ProcessingStatus>
    ) {
        let total_chunks = chunks.len();
        // Channel to signal when new audio is ready
        let (audio_sender, audio_receiver) = mpsc::channel::<PathBuf>();
        // Spawn audio player thread
        let player_sender = sender.clone();
        thread::spawn(move || {
            let mut chunk_num = 1;
            // Play audio files as they become available
            while let Ok(audio_file) = audio_receiver.recv() {
                let _ = player_sender.send(ProcessingStatus::PlayingChunk(chunk_num, total_chunks));
                Self::play_audio_sync(&audio_file);
                chunk_num += 1;
                // If this was the last chunk, we're done
                if chunk_num > total_chunks {
                    let _ = player_sender.send(ProcessingStatus::Completed);
                    break;
                }
            }
        });
        // Process each chunk and send to player as soon as ready
        for (i, chunk) in chunks.iter().enumerate() {
            let chunk_num = i + 1;
            // Send status update
            let _ = sender.send(ProcessingStatus::ProcessingChunk(chunk_num, total_chunks));
            let audio_file = temp_path.join(format!("chunk_{}.wav", i));
            println!("Processing chunk {}/{}: {}...", chunk_num, total_chunks, 
                    &chunk.chars().take(40).collect::<String>());
            let result = Command::new(&config.exec_path)
                .arg("--model").arg(&config.model_path)
                .arg("--data").arg(&config.voice_data)
                .arg("--speed").arg(config.speed.to_string())
                .arg("--style").arg(&config.voice_style)
                .arg("text").arg(chunk)
                .arg("--output").arg(&audio_file)
                .stdout(Stdio::piped())
                .stderr(Stdio::piped())
                .output();
            match result {
                Ok(output) => {
                    if output.status.success() && audio_file.exists() {
-                        self.status = "Audio generated, starting playback...".to_string();
+                        // Send this audio file to the player immediately
-                        self.play_audio(&audio_file);
+                        if audio_sender.send(audio_file).is_err() {
                            eprintln!("Failed to send audio file to player");
                            let _ = sender.send(ProcessingStatus::Error(
                                "Audio player thread disconnected".to_string()
                            ));
                            return;
                        }
                    } else {
-                        let error_msg = String::from_utf8_lossy(&output.stderr);
+                        eprintln!("Kokoro failed for chunk {}: {}", chunk_num, 
-                        self.status = format!("TTS failed: {}", error_msg);
+                                String::from_utf8_lossy(&output.stderr));
-                        self.is_processing = false;
+                        let _ = sender.send(ProcessingStatus::Error(
                            format!("Failed to process chunk {}", chunk_num)
                        ));
                        return;
                    }
                }
                Err(e) => {
-                    self.status = format!("Failed to run TTS: {}", e);
+                    eprintln!("Failed to run Kokoro for chunk {}: {}", chunk_num, e);
-                    self.is_processing = false;
+                    let _ = sender.send(ProcessingStatus::Error(
-                }
+                        format!("Failed to run Kokoro for chunk {}: {}", chunk_num, e)
                    ));
                    return;
                }
            }
        }
-    fn play_audio(&mut self, audio_file: &PathBuf) {
+        drop(audio_sender);
-        // Try different audio players
+        
-        let players = ["aplay", "paplay", "play", "ffplay"];
+        println!("✅ All chunks processed!");
    }
    fn play_audio_sync(audio_file: &PathBuf) {
        let players = ["aplay", "paplay", "play", "ffplay", "mpg123"];
        for player in &players {
            let result = Command::new(player)
                .arg(audio_file)
                .stdout(Stdio::null())
-                .stderr(Stdio::piped())
+                .stderr(Stdio::null())
                .spawn();
            match result {
                Ok(mut child) => {
-                    self.status = format!("Playing audio with {}...", player);
+                    println!("Playing audio with {}...", player);
                    // Wait for playback to complete
                    match child.wait() {
                        Ok(exit_status) => {
                            if exit_status.success() {
-                                self.status = "Playback completed!".to_string();
+                                println!("✅ Chunk played successfully!");
-                            } else {
+                                return;
                                self.status = format!("Playback failed with {}", player);
                            }
                        }
                        Err(e) => {
-                            self.status = format!("Error waiting for playback: {}", e);
+                            eprintln!("Error during playback: {}", e);
                        }
                    }
                    self.is_processing = false;
                    return;
                }
                Err(_) => {
                    // Try next player
                    continue;
                        }
                    }
                }
-        
+                Err(_) => continue,
        self.status = "No audio player found (tried: aplay, paplay, play, ffplay)".to_string();
        self.is_processing = false;
            }
        }
-impl eframe::App for TtsApp {
+        eprintln!("❌ No audio player found");
    }
 }
 impl eframe::App for TtsUi {
    fn update(&mut self, ctx: &egui::Context, _frame: &mut eframe::Frame) {
        // Check for status updates from background processing
        if let Some(ref receiver) = self.status_receiver {
            if let Ok(new_status) = receiver.try_recv() {
                match &new_status {
                    ProcessingStatus::ProcessingChunk(current, total) => {
                        self.status = format!("Processing chunk {}/{}", current, total);
                    }
                    ProcessingStatus::PlayingChunk(current, total) => {
                        self.status = format!("Playing chunk {}/{}", current, total);
                        self.currently_playing_chunk = Some(*current); 
                    }
                    ProcessingStatus::Completed => {
                        self.status = "✅ All chunks completed successfully!".to_string();
                        self.currently_playing_chunk = None;
                        self.is_processing = false;
                    }
                    ProcessingStatus::Error(err) => {
                        self.status = format!("❌ Error: {}", err);
                        self.currently_playing_chunk = None;
                        self.is_processing = false;
                    }
                    _ => {}
                }
                self.processing_status = new_status;
            }
        }
        if !self.window_loaded {
            self.frames_since_load += 1;
            if self.frames_since_load > 5 {
                self.window_loaded = true;
                self.status = "Window loaded!".to_string();
                // Start auto-processing if provided text
                if self.should_auto_process {
                    self.start_tts_processing();
                }
            }
        }
        egui::CentralPanel::default().show(ctx, |ui| {
            egui::ScrollArea::vertical()
                .auto_shrink([false; 2])
                .show(ui, |ui| {
                    ui.vertical(|ui| {
-                // Title and status
+                        ui.horizontal(|ui| {
-                ui.heading("🔊 TTS Processor");
+                            ui.label("Status:");
-                ui.label(format!("Status: {}", self.status));
+                            if self.is_processing {
                                ui.spinner();
                            }
                            ui.label(&self.status);
                        });
                        ui.separator();
                        ui.add_space(10.0);
-                // Control buttons
+                                                
                        // Current audio display
                        ui.horizontal(|ui| {
-                    if ui.button("🎵 Start TTS Processing").clicked() && !self.is_processing {
+                            ui.label("🎵 Currently Playing:");
                            if let Some(playing_chunk) = self.currently_playing_chunk {
                                ui.label(format!("Chunk {}/{}", playing_chunk, self.chunks.len()));
                            } else {
                                match &self.processing_status {
                                    ProcessingStatus::Completed => {
                                        ui.label("Playback completed");
                                    }
                                    ProcessingStatus::Error(_) => {
                                        ui.label("Error occurred");
                                    }
                                    ProcessingStatus::ProcessingChunk(_, _) => {
                                        ui.label("Preparing audio...");
                                    }
                                    _ => {
                                        ui.label("Ready to start");
                                    }
                                }
                            }
                        });
                        // Show the chunk that is currently being played
                        if let Some(playing_chunk) = self.currently_playing_chunk {
                            if let Some(playing_text) = self.chunks.get(playing_chunk - 1) {
                                egui::ScrollArea::vertical()
                                    .max_height(120.0)
                                    .show(ui, |ui| {
                                        ui.add(
                                            egui::TextEdit::multiline(&mut playing_text.as_str())
                                                .desired_width(f32::INFINITY)
                                                .desired_rows(1)
                                                .interactive(false)
                                        );
                                    });
                            }
                        } else {
                            match &self.processing_status {
                                ProcessingStatus::Completed => {
                                    ui.label("🎉 All audio chunks have been played successfully!");
                                }
                                ProcessingStatus::Error(err) => {
                                    ui.colored_label(egui::Color32::RED, format!("❌ Error: {}", err));
                                }
                                ProcessingStatus::ProcessingChunk(current, total) => {
                                    ui.label(format!("⏳ Processing chunk {}/{}... Audio will start soon.", current, total));
                                }
                                _ => {
                                    // When not playing anything, show the input text for editing
                                    if !self.is_processing && self.chunks.is_empty() {
                                        egui::ScrollArea::vertical()
                                            .max_height(120.0)
                                            .show(ui, |ui| {
                                                ui.add(
                                                    egui::TextEdit::multiline(&mut self.cleaned_text)
                                                        .desired_width(f32::INFINITY)
                                                        .desired_rows(5)
                                                        .interactive(!self.is_processing)
                                                );
                                            });
                                    } else {
                                        ui.label("⏳ Waiting for audio playback to begin...");
                                    }
                                }
                            }
                        }
                        ui.add_space(10.0);
                        ui.separator();
                        ui.add_space(5.0);
                        // Collapsible text input section (moved above current audio)
                        ui.collapsing("📝 Text Input", |ui| {
                            ui.horizontal(|ui| {
                                ui.label("Text to process:");
                                ui.label(format!("({} characters)", self.cleaned_text.len()));
                                if !self.chunks.is_empty() {
                                    ui.label(format!("- {} chunks", self.chunks.len()));
                                }
                            });
                            egui::ScrollArea::vertical()
                                .max_height(150.0)
                                .show(ui, |ui| {
                                    ui.add(
                                        egui::TextEdit::multiline(&mut self.cleaned_text)
                                            .desired_width(f32::INFINITY)
                                            .desired_rows(8)
                                            .interactive(!self.is_processing)
                                    );
                                });
                            // Show all chunks if available
                            if !self.chunks.is_empty() {
                                ui.add_space(5.0);
                                ui.label("📄 All Chunks:");
                                egui::ScrollArea::vertical()
                                    .max_height(100.0)
                                    .show(ui, |ui| {
                                        for (i, chunk) in self.chunks.iter().enumerate() {
                                            let chunk_preview = if chunk.len() > 80 {
                                                format!("{}...", &chunk.chars().take(80).collect::<String>())
                                            } else {
                                                chunk.clone()
                                            };
                                            // Highlight current chunk being processed or played
                                            let is_current = match &self.processing_status {
                                                ProcessingStatus::ProcessingChunk(current, _) => *current == i + 1,
                                                ProcessingStatus::PlayingChunk(current, _) => *current == i + 1,
                                                _ => false,
                                            };
                                            if is_current {
                                                ui.colored_label(egui::Color32::GREEN, format!("▶ {}: {}", i + 1, chunk_preview));
                                            } else {
                                                ui.label(format!("{}: {}", i + 1, chunk_preview));
                                            }
                                        }
                                    });
                            }
                        });
                        ui.add_space(10.0);
                        // Controls
                        ui.horizontal(|ui| {
                            if ui.button("🎵 Process with Kokoro").clicked() && !self.is_processing {
                                self.start_tts_processing();
                            }
@ -237,42 +565,62 @@ impl eframe::App for TtsApp {
                        ui.add_space(10.0);
                        ui.separator();
-                ui.add_space(10.0);
+                        ui.add_space(5.0);
-                // Text display
+
-                ui.label("📝 Original Text:");
+
-                egui::ScrollArea::vertical()
+                        // Configuration panel
-                    .max_height(100.0)
+                        ui.collapsing("⚙ Configuration", |ui| {
-                    .show(ui, |ui| {
+                            ui.horizontal(|ui| {
-                        ui.add(
+                                ui.label("Speed:");
-                            egui::TextEdit::multiline(&mut self.original_text.as_str())
+                                ui.add(egui::Slider::new(&mut self.config.speed, 0.5..=2.0).step_by(0.1));
-                                .desired_width(f32::INFINITY)
+                            });
-                                .interactive(false)
+                            
-                        );
+                            ui.horizontal(|ui| {
                                ui.label("Voice Style:");
                                ui.text_edit_singleline(&mut self.config.voice_style);
                            });
                            ui.separator();
                            ui.label("Chunking Settings:");
                            ui.horizontal(|ui| {
                                ui.label("Min chunk size:");
                                ui.add(egui::DragValue::new(&mut self.config.chunking.min_chunk_size).clamp_range(20..=500));
                            });
                            ui.horizontal(|ui| {
                                ui.label("Max chunk size:");
                                ui.add(egui::DragValue::new(&mut self.config.chunking.max_chunk_size).clamp_range(50..=1000));
                            });
                            ui.horizontal(|ui| {
                                ui.label("Min sentences:");
                                ui.add(egui::DragValue::new(&mut self.config.chunking.min_sentences).clamp_range(1..=10));
                            });
                            ui.separator();
                            ui.label("Paths:");
                            ui.horizontal(|ui| {
                                ui.label("Executable:");
                                ui.text_edit_singleline(&mut self.config.exec_path);
                            });
                            ui.horizontal(|ui| {
                                ui.label("Model Path:");
                                ui.text_edit_singleline(&mut self.config.model_path);
                            });
                            ui.horizontal(|ui| {
                                ui.label("Voice Data:");
                                ui.text_edit_singleline(&mut self.config.voice_data);
                            });
                        });
                        ui.add_space(10.0);
-                
+                        ui.separator();
-                ui.label("✨ Cleaned Text (will be processed):");
+                        ui.add_space(5.0);
                egui::ScrollArea::vertical()
                    .max_height(120.0)
                    .show(ui, |ui| {
                        ui.add(
                            egui::TextEdit::multiline(&mut self.cleaned_text.as_str())
                                .desired_width(f32::INFINITY)
                                .interactive(false)
                        );
                    });
                ui.add_space(10.0);
                // Configuration display
                ui.collapsing("⚙️ TTS Configuration", |ui| {
                    ui.label(format!("Executable: {}", self.config.exec_path));
                    ui.label(format!("Model: {}", self.config.model_path));
                    ui.label(format!("Voice Data: {}", self.config.voice_data));
                    ui.label(format!("Speed: {}", self.config.speed));
                    ui.label(format!("Voice Style: {}", self.config.voice_style));
                    });
                });
        });
@ -281,5 +629,10 @@ impl eframe::App for TtsApp {
        if ctx.input(|i| i.key_pressed(egui::Key::Escape)) {
            ctx.send_viewport_cmd(egui::ViewportCommand::Close);
        }
        // Request repaint to keep checking window load status
        if !self.window_loaded {
            ctx.request_repaint();
        }
    }
 }