no config, kokoros only, no chunks

This commit is contained in:
jrosh 2025-06-02 13:02:26 +02:00
commit 4a70c57c8b
No known key found for this signature in database
GPG key ID: A4D68DCA6C9CCD2D
4 changed files with 4399 additions and 0 deletions

1
.gitignore vendored Normal file
View file

@ -0,0 +1 @@
target/

4103
Cargo.lock generated Normal file

File diff suppressed because it is too large Load diff

10
Cargo.toml Normal file
View file

@ -0,0 +1,10 @@
[package]
name = "ttsui"
version = "0.1.0"
edition = "2021"
[dependencies]
eframe = "0.27"
egui = "0.27"
atty = "0.2"
tempfile = "3.8"

285
src/main.rs Normal file
View file

@ -0,0 +1,285 @@
use eframe::egui;
use std::io::{self, Read};
use std::process::{Command, Stdio};
use std::fs;
use std::path::PathBuf;
use tempfile::TempDir;
// TTS Configuration - make this configurable later
struct TtsConfig {
exec_path: String,
model_path: String,
voice_data: String,
speed: f32,
voice_style: String,
}
impl Default for TtsConfig {
fn default() -> Self {
let home = std::env::var("HOME").unwrap_or_else(|_| ".".to_string());
Self {
exec_path: format!("{}/bin/kokoros/target/release/koko", home),
model_path: format!("{}/bin/kokoros/checkpoints/kokoro-v1.0.onnx", home),
voice_data: format!("{}/bin/kokoros/voices-v1.0.bin", home),
speed: 1.1,
voice_style: "af_heart".to_string(),
}
}
}
fn main() -> Result<(), eframe::Error> {
let input_text = get_piped_text();
let options = eframe::NativeOptions {
viewport: egui::ViewportBuilder::default()
.with_inner_size([600.0, 400.0])
.with_decorations(false),
..Default::default()
};
eframe::run_native(
"TTS Processor",
options,
Box::new(move |_cc| Box::new(TtsApp::new(input_text))),
)
}
fn get_piped_text() -> String {
if !atty::is(atty::Stream::Stdin) {
let mut buffer = String::new();
if let Ok(_) = io::stdin().read_to_string(&mut buffer) {
if !buffer.trim().is_empty() {
return clean_text(&buffer);
}
}
}
"No piped text provided.\n\nUsage: echo 'text' | ./tts_processor".to_string()
}
// Text cleaning function based on your bash script
fn clean_text(text: &str) -> String {
text
// Remove line breaks with hyphens
.replace("-\r", "").replace("-\n", "")
// Replace line breaks with spaces
.replace('\r', " ").replace('\n', " ")
// Compress multiple spaces
.split_whitespace().collect::<Vec<_>>().join(" ")
// Replace double dashes with em dash
.replace("--", "").replace(" - ", "")
// Replace ellipsis
.replace("...", "")
// Remove commas in numbers (basic version)
.replace(",", "")
// Add space after punctuation if missing (basic version)
.chars()
.collect::<Vec<_>>()
.windows(2)
.map(|w| {
if matches!(w[0], '.' | ',' | ';' | ':') && w[1] != ' ' {
format!("{} {}", w[0], w[1])
} else {
w[0].to_string()
}
})
.collect::<String>()
.trim()
.to_string()
}
struct TtsApp {
original_text: String,
cleaned_text: String,
status: String,
is_processing: bool,
config: TtsConfig,
temp_dir: Option<TempDir>,
}
impl TtsApp {
fn new(text: String) -> Self {
let cleaned = clean_text(&text);
Self {
original_text: text,
cleaned_text: cleaned,
status: "Ready to process".to_string(),
is_processing: false,
config: TtsConfig::default(),
temp_dir: None,
}
}
fn start_tts_processing(&mut self) {
if self.is_processing {
return;
}
self.is_processing = true;
self.status = "Starting TTS processing...".to_string();
// Create temporary directory
match TempDir::new() {
Ok(temp_dir) => {
self.temp_dir = Some(temp_dir);
self.process_text();
}
Err(e) => {
self.status = format!("Error creating temp directory: {}", e);
self.is_processing = false;
}
}
}
fn process_text(&mut self) {
if let Some(ref temp_dir) = self.temp_dir {
let temp_path = temp_dir.path();
let audio_file = temp_path.join("output.wav");
self.status = "Generating audio...".to_string();
// Run TTS command
let result = Command::new(&self.config.exec_path)
.arg("--model").arg(&self.config.model_path)
.arg("--data").arg(&self.config.voice_data)
.arg("--speed").arg(self.config.speed.to_string())
.arg("--style").arg(&self.config.voice_style)
.arg("text").arg(&self.cleaned_text)
.arg("--output").arg(&audio_file)
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.output();
match result {
Ok(output) => {
if output.status.success() && audio_file.exists() {
self.status = "Audio generated, starting playback...".to_string();
self.play_audio(&audio_file);
} else {
let error_msg = String::from_utf8_lossy(&output.stderr);
self.status = format!("TTS failed: {}", error_msg);
self.is_processing = false;
}
}
Err(e) => {
self.status = format!("Failed to run TTS: {}", e);
self.is_processing = false;
}
}
}
}
fn play_audio(&mut self, audio_file: &PathBuf) {
// Try different audio players
let players = ["aplay", "paplay", "play", "ffplay"];
for player in &players {
let result = Command::new(player)
.arg(audio_file)
.stdout(Stdio::null())
.stderr(Stdio::piped())
.spawn();
match result {
Ok(mut child) => {
self.status = format!("Playing audio with {}...", player);
// Wait for playback to complete
match child.wait() {
Ok(exit_status) => {
if exit_status.success() {
self.status = "Playback completed!".to_string();
} else {
self.status = format!("Playback failed with {}", player);
}
}
Err(e) => {
self.status = format!("Error waiting for playback: {}", e);
}
}
self.is_processing = false;
return;
}
Err(_) => {
// Try next player
continue;
}
}
}
self.status = "No audio player found (tried: aplay, paplay, play, ffplay)".to_string();
self.is_processing = false;
}
}
impl eframe::App for TtsApp {
fn update(&mut self, ctx: &egui::Context, _frame: &mut eframe::Frame) {
egui::CentralPanel::default().show(ctx, |ui| {
ui.vertical(|ui| {
// Title and status
ui.heading("🔊 TTS Processor");
ui.label(format!("Status: {}", self.status));
ui.separator();
ui.add_space(10.0);
// Control buttons
ui.horizontal(|ui| {
if ui.button("🎵 Start TTS Processing").clicked() && !self.is_processing {
self.start_tts_processing();
}
ui.separator();
if ui.button("❌ Exit").clicked() {
ctx.send_viewport_cmd(egui::ViewportCommand::Close);
}
});
ui.add_space(10.0);
ui.separator();
ui.add_space(10.0);
// Text display
ui.label("📝 Original Text:");
egui::ScrollArea::vertical()
.max_height(100.0)
.show(ui, |ui| {
ui.add(
egui::TextEdit::multiline(&mut self.original_text.as_str())
.desired_width(f32::INFINITY)
.interactive(false)
);
});
ui.add_space(10.0);
ui.label("✨ Cleaned Text (will be processed):");
egui::ScrollArea::vertical()
.max_height(120.0)
.show(ui, |ui| {
ui.add(
egui::TextEdit::multiline(&mut self.cleaned_text.as_str())
.desired_width(f32::INFINITY)
.interactive(false)
);
});
ui.add_space(10.0);
// Configuration display
ui.collapsing("⚙️ TTS Configuration", |ui| {
ui.label(format!("Executable: {}", self.config.exec_path));
ui.label(format!("Model: {}", self.config.model_path));
ui.label(format!("Voice Data: {}", self.config.voice_data));
ui.label(format!("Speed: {}", self.config.speed));
ui.label(format!("Voice Style: {}", self.config.voice_style));
});
});
});
// Close on Escape
if ctx.input(|i| i.key_pressed(egui::Key::Escape)) {
ctx.send_viewport_cmd(egui::ViewportCommand::Close);
}
}
}