added config, chunkinkg and current playback text display

This commit is contained in:
jrosh 2025-06-02 14:46:54 +02:00
commit 22e8481e30
No known key found for this signature in database
GPG key ID: A4D68DCA6C9CCD2D

View file

@ -1,20 +1,39 @@
use eframe::egui;
use std::io::{self, Read};
use std::process::{Command, Stdio};
use std::fs;
use std::path::PathBuf;
use tempfile::TempDir;
use std::thread;
use std::sync::mpsc;
// TTS Configuration - make this configurable later
struct TtsConfig {
#[derive(Debug, Clone)]
struct ChunkingConfig {
min_chunk_size: usize,
max_chunk_size: usize,
min_sentences: usize,
}
impl Default for ChunkingConfig {
fn default() -> Self {
Self {
min_chunk_size: 80,
max_chunk_size: 200,
min_sentences: 2,
}
}
}
#[derive(Debug, Clone)]
struct KokoroConfig {
exec_path: String,
model_path: String,
voice_data: String,
speed: f32,
voice_style: String,
chunking: ChunkingConfig,
}
impl Default for TtsConfig {
impl Default for KokoroConfig {
fn default() -> Self {
let home = std::env::var("HOME").unwrap_or_else(|_| ".".to_string());
Self {
@ -23,24 +42,36 @@ impl Default for TtsConfig {
voice_data: format!("{}/bin/kokoros/voices-v1.0.bin", home),
speed: 1.1,
voice_style: "af_heart".to_string(),
chunking: ChunkingConfig::default(),
}
}
}
#[derive(Debug, Clone)]
enum ProcessingStatus {
Idle,
Chunking,
ProcessingChunk(usize, usize), // current, total
PlayingChunk(usize, usize),
Completed,
Error(String),
}
fn main() -> Result<(), eframe::Error> {
let input_text = get_piped_text();
let options = eframe::NativeOptions {
viewport: egui::ViewportBuilder::default()
.with_inner_size([600.0, 400.0])
.with_inner_size([700.0, 400.0])
.with_resizable(true)
.with_decorations(false),
..Default::default()
};
eframe::run_native(
"TTS Processor",
"Kokoro TTS Processor",
options,
Box::new(move |_cc| Box::new(TtsApp::new(input_text))),
Box::new(move |_cc| Box::new(TtsUi::new(input_text))),
)
}
@ -54,177 +85,474 @@ fn get_piped_text() -> String {
}
}
"No piped text provided.\n\nUsage: echo 'text' | ./tts_processor".to_string()
"No piped text provided.\n\nUsage: echo 'text' | ./ttsui\nOr: cat file.txt | ./ttsui".to_string()
}
// Text cleaning function based on your bash script
fn clean_text(text: &str) -> String {
text
// Remove line breaks with hyphens
.replace("-\r", "").replace("-\n", "")
// Replace line breaks with spaces
.replace('\r', " ").replace('\n', " ")
// Compress multiple spaces
.split_whitespace().collect::<Vec<_>>().join(" ")
// Replace double dashes with em dash
.replace("--", "").replace(" - ", "")
// Replace ellipsis
.replace("...", "")
// Remove commas in numbers (basic version)
.replace(",", "")
// Add space after punctuation if missing (basic version)
.chars()
text.lines()
.map(|line| line.trim())
.filter(|line| !line.is_empty())
.collect::<Vec<_>>()
.windows(2)
.map(|w| {
if matches!(w[0], '.' | ',' | ';' | ':') && w[1] != ' ' {
format!("{} {}", w[0], w[1])
} else {
w[0].to_string()
}
})
.collect::<String>()
.trim()
.to_string()
.join(" ")
.replace("-\r", "").replace("-\n", "")
.replace('\r', " ").replace('\n', " ")
.split_whitespace().collect::<Vec<_>>().join(" ")
.replace("--", "").replace(" - ", "")
.replace("...", "")
.replace(",", "")
.trim().to_string()
}
struct TtsApp {
fn smart_chunk_text(text: &str, config: &ChunkingConfig) -> Vec<String> {
// Split text into sentences
let sentences: Vec<&str> = text
.split(|c| matches!(c, '.' | '!' | '?'))
.map(|s| s.trim())
.filter(|s| !s.is_empty())
.collect();
if sentences.is_empty() {
return vec![text.to_string()];
}
let mut chunks = Vec::new();
let mut current_chunk = String::new();
let mut sentence_count = 0;
for sentence in sentences {
let sentence_with_punct = format!("{}.", sentence);
// Create the potential new chunk
let potential_chunk = if current_chunk.is_empty() {
sentence_with_punct.clone()
} else {
format!("{} {}", current_chunk, sentence_with_punct)
};
// Check if we should output the current chunk before adding this sentence
let should_break = !current_chunk.is_empty() && (
potential_chunk.len() > config.max_chunk_size ||
(sentence_count >= config.min_sentences &&
current_chunk.len() >= config.min_chunk_size &&
potential_chunk.len() >= (config.min_chunk_size as f32 * 1.5) as usize)
);
if should_break {
// Save current chunk and start new one
chunks.push(current_chunk.trim().to_string());
current_chunk = sentence_with_punct;
sentence_count = 1;
} else {
// Continue building current chunk
current_chunk = potential_chunk;
sentence_count += 1;
}
}
// Add the final chunk
if !current_chunk.is_empty() {
chunks.push(current_chunk.trim().to_string());
}
// Fallback
if chunks.is_empty() {
chunks.push(text.to_string());
}
chunks
}
struct TtsUi {
original_text: String,
cleaned_text: String,
chunks: Vec<String>,
status: String,
processing_status: ProcessingStatus,
currently_playing_chunk: Option<usize>,
is_processing: bool,
config: TtsConfig,
config: KokoroConfig,
temp_dir: Option<TempDir>,
window_loaded: bool,
should_auto_process: bool,
frames_since_load: u32,
status_receiver: Option<mpsc::Receiver<ProcessingStatus>>,
}
impl TtsApp {
impl TtsUi {
fn new(text: String) -> Self {
let cleaned = clean_text(&text);
let has_text = !cleaned.trim().is_empty() && !cleaned.starts_with("No piped text");
Self {
original_text: text,
cleaned_text: cleaned,
status: "Ready to process".to_string(),
chunks: Vec::new(),
status: if has_text { "Window loading..." } else { "Waiting for text input" }.to_string(),
processing_status: ProcessingStatus::Idle,
currently_playing_chunk: None,
is_processing: false,
config: TtsConfig::default(),
config: KokoroConfig::default(),
temp_dir: None,
window_loaded: false,
should_auto_process: has_text,
frames_since_load: 0,
status_receiver: None,
}
}
fn start_tts_processing(&mut self) {
if self.is_processing {
if self.is_processing || self.cleaned_text.trim().is_empty() {
return;
}
self.is_processing = true;
self.status = "Starting TTS processing...".to_string();
self.processing_status = ProcessingStatus::Chunking;
self.status = "Chunking text...".to_string();
// Create chunks
self.chunks = smart_chunk_text(&self.cleaned_text, &self.config.chunking);
self.status = format!("Created {} chunks", self.chunks.len());
// Create temporary directory
match TempDir::new() {
Ok(temp_dir) => {
self.temp_dir = Some(temp_dir);
self.process_text();
self.process_chunks();
}
Err(e) => {
self.status = format!("Error creating temp directory: {}", e);
self.processing_status = ProcessingStatus::Error(e.to_string());
self.is_processing = false;
}
}
}
fn process_text(&mut self) {
fn process_chunks(&mut self) {
if let Some(ref temp_dir) = self.temp_dir {
let temp_path = temp_dir.path();
let audio_file = temp_path.join("output.wav");
let temp_path = temp_dir.path().to_path_buf();
let chunks = self.chunks.clone();
let config = self.config.clone();
self.status = "Generating audio...".to_string();
// Create a channel for status updates
let (sender, receiver) = mpsc::channel();
self.status_receiver = Some(receiver);
// Run TTS command
let result = Command::new(&self.config.exec_path)
.arg("--model").arg(&self.config.model_path)
.arg("--data").arg(&self.config.voice_data)
.arg("--speed").arg(self.config.speed.to_string())
.arg("--style").arg(&self.config.voice_style)
.arg("text").arg(&self.cleaned_text)
// Process chunks in background thread
thread::spawn(move || {
Self::process_chunks_background(temp_path, chunks, config, sender);
});
self.status = "Starting chunk processing...".to_string();
}
}
fn process_chunks_background(
temp_path: PathBuf,
chunks: Vec<String>,
config: KokoroConfig,
sender: mpsc::Sender<ProcessingStatus>
) {
let total_chunks = chunks.len();
// Channel to signal when new audio is ready
let (audio_sender, audio_receiver) = mpsc::channel::<PathBuf>();
// Spawn audio player thread
let player_sender = sender.clone();
thread::spawn(move || {
let mut chunk_num = 1;
// Play audio files as they become available
while let Ok(audio_file) = audio_receiver.recv() {
let _ = player_sender.send(ProcessingStatus::PlayingChunk(chunk_num, total_chunks));
Self::play_audio_sync(&audio_file);
chunk_num += 1;
// If this was the last chunk, we're done
if chunk_num > total_chunks {
let _ = player_sender.send(ProcessingStatus::Completed);
break;
}
}
});
// Process each chunk and send to player as soon as ready
for (i, chunk) in chunks.iter().enumerate() {
let chunk_num = i + 1;
// Send status update
let _ = sender.send(ProcessingStatus::ProcessingChunk(chunk_num, total_chunks));
let audio_file = temp_path.join(format!("chunk_{}.wav", i));
println!("Processing chunk {}/{}: {}...", chunk_num, total_chunks,
&chunk.chars().take(40).collect::<String>());
let result = Command::new(&config.exec_path)
.arg("--model").arg(&config.model_path)
.arg("--data").arg(&config.voice_data)
.arg("--speed").arg(config.speed.to_string())
.arg("--style").arg(&config.voice_style)
.arg("text").arg(chunk)
.arg("--output").arg(&audio_file)
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.output();
match result {
Ok(output) => {
if output.status.success() && audio_file.exists() {
self.status = "Audio generated, starting playback...".to_string();
self.play_audio(&audio_file);
// Send this audio file to the player immediately
if audio_sender.send(audio_file).is_err() {
eprintln!("Failed to send audio file to player");
let _ = sender.send(ProcessingStatus::Error(
"Audio player thread disconnected".to_string()
));
return;
}
} else {
let error_msg = String::from_utf8_lossy(&output.stderr);
self.status = format!("TTS failed: {}", error_msg);
self.is_processing = false;
eprintln!("Kokoro failed for chunk {}: {}", chunk_num,
String::from_utf8_lossy(&output.stderr));
let _ = sender.send(ProcessingStatus::Error(
format!("Failed to process chunk {}", chunk_num)
));
return;
}
}
Err(e) => {
self.status = format!("Failed to run TTS: {}", e);
self.is_processing = false;
}
eprintln!("Failed to run Kokoro for chunk {}: {}", chunk_num, e);
let _ = sender.send(ProcessingStatus::Error(
format!("Failed to run Kokoro for chunk {}: {}", chunk_num, e)
));
return;
}
}
}
fn play_audio(&mut self, audio_file: &PathBuf) {
// Try different audio players
let players = ["aplay", "paplay", "play", "ffplay"];
drop(audio_sender);
println!("✅ All chunks processed!");
}
fn play_audio_sync(audio_file: &PathBuf) {
let players = ["aplay", "paplay", "play", "ffplay", "mpg123"];
for player in &players {
let result = Command::new(player)
.arg(audio_file)
.stdout(Stdio::null())
.stderr(Stdio::piped())
.stderr(Stdio::null())
.spawn();
match result {
Ok(mut child) => {
self.status = format!("Playing audio with {}...", player);
// Wait for playback to complete
println!("Playing audio with {}...", player);
match child.wait() {
Ok(exit_status) => {
if exit_status.success() {
self.status = "Playback completed!".to_string();
} else {
self.status = format!("Playback failed with {}", player);
println!("✅ Chunk played successfully!");
return;
}
}
Err(e) => {
self.status = format!("Error waiting for playback: {}", e);
}
}
self.is_processing = false;
return;
}
Err(_) => {
// Try next player
continue;
eprintln!("Error during playback: {}", e);
}
}
}
self.status = "No audio player found (tried: aplay, paplay, play, ffplay)".to_string();
self.is_processing = false;
Err(_) => continue,
}
}
impl eframe::App for TtsApp {
eprintln!("❌ No audio player found");
}
}
impl eframe::App for TtsUi {
fn update(&mut self, ctx: &egui::Context, _frame: &mut eframe::Frame) {
// Check for status updates from background processing
if let Some(ref receiver) = self.status_receiver {
if let Ok(new_status) = receiver.try_recv() {
match &new_status {
ProcessingStatus::ProcessingChunk(current, total) => {
self.status = format!("Processing chunk {}/{}", current, total);
}
ProcessingStatus::PlayingChunk(current, total) => {
self.status = format!("Playing chunk {}/{}", current, total);
self.currently_playing_chunk = Some(*current);
}
ProcessingStatus::Completed => {
self.status = "✅ All chunks completed successfully!".to_string();
self.currently_playing_chunk = None;
self.is_processing = false;
}
ProcessingStatus::Error(err) => {
self.status = format!("❌ Error: {}", err);
self.currently_playing_chunk = None;
self.is_processing = false;
}
_ => {}
}
self.processing_status = new_status;
}
}
if !self.window_loaded {
self.frames_since_load += 1;
if self.frames_since_load > 5 {
self.window_loaded = true;
self.status = "Window loaded!".to_string();
// Start auto-processing if provided text
if self.should_auto_process {
self.start_tts_processing();
}
}
}
egui::CentralPanel::default().show(ctx, |ui| {
egui::ScrollArea::vertical()
.auto_shrink([false; 2])
.show(ui, |ui| {
ui.vertical(|ui| {
// Title and status
ui.heading("🔊 TTS Processor");
ui.label(format!("Status: {}", self.status));
ui.horizontal(|ui| {
ui.label("Status:");
if self.is_processing {
ui.spinner();
}
ui.label(&self.status);
});
ui.separator();
ui.add_space(10.0);
// Control buttons
// Current audio display
ui.horizontal(|ui| {
if ui.button("🎵 Start TTS Processing").clicked() && !self.is_processing {
ui.label("🎵 Currently Playing:");
if let Some(playing_chunk) = self.currently_playing_chunk {
ui.label(format!("Chunk {}/{}", playing_chunk, self.chunks.len()));
} else {
match &self.processing_status {
ProcessingStatus::Completed => {
ui.label("Playback completed");
}
ProcessingStatus::Error(_) => {
ui.label("Error occurred");
}
ProcessingStatus::ProcessingChunk(_, _) => {
ui.label("Preparing audio...");
}
_ => {
ui.label("Ready to start");
}
}
}
});
// Show the chunk that is currently being played
if let Some(playing_chunk) = self.currently_playing_chunk {
if let Some(playing_text) = self.chunks.get(playing_chunk - 1) {
egui::ScrollArea::vertical()
.max_height(120.0)
.show(ui, |ui| {
ui.add(
egui::TextEdit::multiline(&mut playing_text.as_str())
.desired_width(f32::INFINITY)
.desired_rows(1)
.interactive(false)
);
});
}
} else {
match &self.processing_status {
ProcessingStatus::Completed => {
ui.label("🎉 All audio chunks have been played successfully!");
}
ProcessingStatus::Error(err) => {
ui.colored_label(egui::Color32::RED, format!("❌ Error: {}", err));
}
ProcessingStatus::ProcessingChunk(current, total) => {
ui.label(format!("⏳ Processing chunk {}/{}... Audio will start soon.", current, total));
}
_ => {
// When not playing anything, show the input text for editing
if !self.is_processing && self.chunks.is_empty() {
egui::ScrollArea::vertical()
.max_height(120.0)
.show(ui, |ui| {
ui.add(
egui::TextEdit::multiline(&mut self.cleaned_text)
.desired_width(f32::INFINITY)
.desired_rows(5)
.interactive(!self.is_processing)
);
});
} else {
ui.label("⏳ Waiting for audio playback to begin...");
}
}
}
}
ui.add_space(10.0);
ui.separator();
ui.add_space(5.0);
// Collapsible text input section (moved above current audio)
ui.collapsing("📝 Text Input", |ui| {
ui.horizontal(|ui| {
ui.label("Text to process:");
ui.label(format!("({} characters)", self.cleaned_text.len()));
if !self.chunks.is_empty() {
ui.label(format!("- {} chunks", self.chunks.len()));
}
});
egui::ScrollArea::vertical()
.max_height(150.0)
.show(ui, |ui| {
ui.add(
egui::TextEdit::multiline(&mut self.cleaned_text)
.desired_width(f32::INFINITY)
.desired_rows(8)
.interactive(!self.is_processing)
);
});
// Show all chunks if available
if !self.chunks.is_empty() {
ui.add_space(5.0);
ui.label("📄 All Chunks:");
egui::ScrollArea::vertical()
.max_height(100.0)
.show(ui, |ui| {
for (i, chunk) in self.chunks.iter().enumerate() {
let chunk_preview = if chunk.len() > 80 {
format!("{}...", &chunk.chars().take(80).collect::<String>())
} else {
chunk.clone()
};
// Highlight current chunk being processed or played
let is_current = match &self.processing_status {
ProcessingStatus::ProcessingChunk(current, _) => *current == i + 1,
ProcessingStatus::PlayingChunk(current, _) => *current == i + 1,
_ => false,
};
if is_current {
ui.colored_label(egui::Color32::GREEN, format!("{}: {}", i + 1, chunk_preview));
} else {
ui.label(format!("{}: {}", i + 1, chunk_preview));
}
}
});
}
});
ui.add_space(10.0);
// Controls
ui.horizontal(|ui| {
if ui.button("🎵 Process with Kokoro").clicked() && !self.is_processing {
self.start_tts_processing();
}
@ -237,42 +565,62 @@ impl eframe::App for TtsApp {
ui.add_space(10.0);
ui.separator();
ui.add_space(10.0);
ui.add_space(5.0);
// Text display
ui.label("📝 Original Text:");
egui::ScrollArea::vertical()
.max_height(100.0)
.show(ui, |ui| {
ui.add(
egui::TextEdit::multiline(&mut self.original_text.as_str())
.desired_width(f32::INFINITY)
.interactive(false)
);
// Configuration panel
ui.collapsing("⚙ Configuration", |ui| {
ui.horizontal(|ui| {
ui.label("Speed:");
ui.add(egui::Slider::new(&mut self.config.speed, 0.5..=2.0).step_by(0.1));
});
ui.horizontal(|ui| {
ui.label("Voice Style:");
ui.text_edit_singleline(&mut self.config.voice_style);
});
ui.separator();
ui.label("Chunking Settings:");
ui.horizontal(|ui| {
ui.label("Min chunk size:");
ui.add(egui::DragValue::new(&mut self.config.chunking.min_chunk_size).clamp_range(20..=500));
});
ui.horizontal(|ui| {
ui.label("Max chunk size:");
ui.add(egui::DragValue::new(&mut self.config.chunking.max_chunk_size).clamp_range(50..=1000));
});
ui.horizontal(|ui| {
ui.label("Min sentences:");
ui.add(egui::DragValue::new(&mut self.config.chunking.min_sentences).clamp_range(1..=10));
});
ui.separator();
ui.label("Paths:");
ui.horizontal(|ui| {
ui.label("Executable:");
ui.text_edit_singleline(&mut self.config.exec_path);
});
ui.horizontal(|ui| {
ui.label("Model Path:");
ui.text_edit_singleline(&mut self.config.model_path);
});
ui.horizontal(|ui| {
ui.label("Voice Data:");
ui.text_edit_singleline(&mut self.config.voice_data);
});
});
ui.add_space(10.0);
ui.label("✨ Cleaned Text (will be processed):");
egui::ScrollArea::vertical()
.max_height(120.0)
.show(ui, |ui| {
ui.add(
egui::TextEdit::multiline(&mut self.cleaned_text.as_str())
.desired_width(f32::INFINITY)
.interactive(false)
);
});
ui.add_space(10.0);
// Configuration display
ui.collapsing("⚙️ TTS Configuration", |ui| {
ui.label(format!("Executable: {}", self.config.exec_path));
ui.label(format!("Model: {}", self.config.model_path));
ui.label(format!("Voice Data: {}", self.config.voice_data));
ui.label(format!("Speed: {}", self.config.speed));
ui.label(format!("Voice Style: {}", self.config.voice_style));
ui.separator();
ui.add_space(5.0);
});
});
});
@ -281,5 +629,10 @@ impl eframe::App for TtsApp {
if ctx.input(|i| i.key_pressed(egui::Key::Escape)) {
ctx.send_viewport_cmd(egui::ViewportCommand::Close);
}
// Request repaint to keep checking window load status
if !self.window_loaded {
ctx.request_repaint();
}
}
}