This commit is contained in:
Martin Jaros 2024-04-25 20:34:28 +02:00
commit 2042af8d20
16 changed files with 1524 additions and 0 deletions

4
lsb_package/__init__.py Normal file
View file

@ -0,0 +1,4 @@
from .start_page import StartPage
from .transcription_page import TranscriptionPage
from .keywords_page import KeywordsPage
from .llm_page import LlmPage

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -0,0 +1,145 @@
import tkinter as tk
from tkinter import ttk
from nltk.stem import WordNetLemmatizer
from datetime import datetime
import re
#from sklearn.decomposition import NMF, LatentDirichletAllocation, MiniBatchNMF
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from nltk.tokenize import word_tokenize
from .llm_page import LlmPage
class KeywordsPage(tk.Frame):
def __init__(self, parent, app_data):
super().__init__(parent)
self.app = parent
self.app_data = app_data
self.grid_columnconfigure(0, weight=1)
self.grid_rowconfigure(4, weight=1)
self.keywords_description = 'Here are the extracted keywords. You can modify them to your liking before feeding them into note generation. Keywords will be added to the top of the transcription file.'
tk.Label(self, text="Keywords", font=self.app_data.heading_font).grid(row=0, column=0, columnspan=2)
self.lecture_filename_label = tk.Label(self, font=self.app_data.mono_font)
self.lecture_filename_label.grid(row=1, column=0, pady=4)
tk.Label(self, text=self.keywords_description, font=self.app_data.paragraph_font, wraplength=400, justify="left").grid(row=2, column=0, columnspan=2, pady=5, padx=5)
self.keywords_textarea = tk.Text(self, wrap="word", font=self.app_data.paragraph_font)
self.keywords_textarea.grid(row=3, column=0, sticky="nsew", pady=6, padx=6)
keywords_scrollbar = tk.Scrollbar(self, command=self.keywords_textarea.yview)
keywords_scrollbar.grid(row=3, column=1, sticky="ns")
self.keywords_textarea.config(yscrollcommand=keywords_scrollbar.set)
tk.Button(self, text="Generate Notes", command=self.write_kw_and_forward_to_llm_page).grid(row=4, column=0, columnspan=2, pady=5)
def write_kw_and_forward_to_llm_page(self):
self.modified_keywords = self.keywords_textarea.get('1.0', tk.END)
self.app_data.modified_keywords = self.modified_keywords
#print(self.app_data.modified_keywords)
keywords = f"Transcription keywords:\n\n{self.modified_keywords}\n"
filename = f"recordings/transcript_{self.app_data.lecture_filename}.txt"
with open(filename, 'r') as file:
transcription = file.read()
with open(filename, 'w') as file:
file.write(keywords)
file.write(transcription)
self.app.show_frame(LlmPage)
def start_kw_extraction_process(self, transcription_text):
# Save the transcription to a text file
if (self.app_data.lecture_filename == None):
date_time_str = datetime.now().strftime("%Y%m%d_%H%M%S")
self.app_data.lecture_filename = f"lecture_{date_time_str}"
filename = f"recordings/transcript_{self.app_data.lecture_filename}.txt"
self.lecture_filename_label.config(text=filename)
else:
filename = f"recordings/transcript_{self.app_data.lecture_filename}.txt"
self.lecture_filename_label.config(text=filename)
with open(filename, "w") as file:
file.write(transcription_text)
# Extract the keywords
keywords = self.extract_topics(transcription_text)
self.keywords_textarea.delete('1.0', tk.END)
self.keywords_textarea.insert(tk.END, "\n".join(keywords))
def extract_topics(self, transcript):
"""Lemmatizing words into their simplest form"""
lemmatizer = WordNetLemmatizer()
# Split transcript into sentences
sentences = re.split(r'[.!?]', transcript)
# Initialize list to store lemmatized data
cleaned_data = []
# Preprocess and lemmatize each sentence
for sentence in sentences:
# Preprocess the sentence
sentence = sentence.lower()
sentence = re.sub(r'[^\w\s]', '', sentence)
# Tokenize the preprocessed sentence
words = word_tokenize(sentence)
# Lemmatize each word and join back to form the sentence
lemmatized_sentence = ' '.join([lemmatizer.lemmatize(word, pos='v') for word in words])
cleaned_data.append(lemmatized_sentence)
"""Setting tf-idf and NMF variables"""
n_samples = len(cleaned_data)
n_features = 20
n_components = 10
n_top_words = 10
batch_size = 128
init = "nndsvda"
"""Use tf-idf features for NMF"""
data_samples = cleaned_data[:n_samples]
tfidf_vectorizer = TfidfVectorizer(
max_df=0.30,
min_df=2,
max_features=n_features,
stop_words="english"
)
tfidf = tfidf_vectorizer.fit_transform(data_samples)
tfidf_feature_names = tfidf_vectorizer.get_feature_names_out()
#print("TF-IDF Feature names: ", tfidf_feature_names)
# nmf = NMF(
# n_components=n_components,
# max_iter=n_samples,
# #tol=1e-4,
# random_state=1,
# init=init,
# beta_loss="frobenius",
# alpha_W=0.00005, #directory theres run command dash, file way root theres echo, hello echo example say run, just run say things dash, know things command say theres, like things example theres way, program run echo way say, shell run things way command, use way things dash command, want root say example command
# alpha_H=0.00005,
# #alpha_W=0, # directory theres run command dash, file way root theres echo, hello echo example say run, just run say things dash, know things command say theres, like things example theres way, program run echo way say, shell run things way command, use way things dash command, want root say example command
# #alpha_H=0,
# l1_ratio=1,
# ).fit(tfidf)
topics_list = []
# Collect the top words for each topic
#for topic_idx, topic in enumerate(nmf.components_):
# Get the top 5 words for this topic
# top_words = [tfidf_feature_names[i] for i in topic.argsort()[:-5 - 1:-1]]
# Convert the list of top words to a string and add to the topics list
# topics_list.append(" ".join(top_words))
topics = set(topics_list) # Naive splitting by spaces
#return sorted(topics) # Return a sorted list of unique words
return sorted(tfidf_feature_names) # Return a sorted list of unique words

93
lsb_package/llm_page.py Normal file
View file

@ -0,0 +1,93 @@
from llama_cpp import Llama
import tkinter as tk
from tkinter import ttk
import threading
class LlmPage(tk.Frame):
def __init__(self, parent, app_data):
super().__init__(parent)
self.app = parent
self.app_data = app_data
tk.Label(self, text="Notes", font=self.app_data.heading_font).grid(row=0, column=0, sticky="ew", pady=2, padx=2)
tk.Label(self, text="Press the generate button and wait for your notes to generate.", font=self.app_data.paragraph_font, wraplength=400, justify="left").grid(row=1, column=0, sticky="ew", pady=2, padx=2)
self.text_widget = tk.Text(self, font=self.app_data.paragraph_font, wrap="word")
self.text_widget.grid(row=2, column=0, sticky="nsew", pady=6, padx=6)
self.start_button = ttk.Button(self, text="Start Operation", command=self.start_llama_operation)
self.start_button.grid(row=3, column=0, sticky="ew", pady=2, padx=2)
def start_llama_operation(self):
if self.app_data.modified_keywords is None:
self.text_widget.insert(tk.END, "Keywords have not been set.")
else:
self.text_widget.delete('1.0', tk.END)
self.text_widget.insert(tk.END, "Please wait...")
operation_thread = threading.Thread(target=self.run_llama_operation, args=(self.app_data.modified_keywords,)) # Pass data explicitly
operation_thread.start()
def run_llama_operation(self, llmTopics):
try:
print("##### Started Llama...")
print(llmTopics)
# Example: Llama class must be imported correctly here
llm = Llama(model_path="/home/jrosh/.local/share/nomic.ai/GPT4All/gpt4all-falcon-newbpe-q4_0.gguf", n_ctx=2048, )
# output = llm.create_chat_completion(
# messages=[
# {"role": "system", "content": "You are a teacher explaining in great detail given topics divided by new line."},
# {"role": "user", "content": llmTopics} # Use local variable passed to thread
# ]
# )
output = llm(
f"Genereate comprehensive, informative and factual descriptions for the provided keywords '{llmTopics}", # Prompt
max_tokens=0, # Generate up to 32 tokens, set to None to generate up to the end of the context window
)
"""Generate text from a prompt.
Args:
prompt: The prompt to generate text from.
suffix: A suffix to append to the generated text. If None, no suffix is appended.
max_tokens: The maximum number of tokens to generate. If max_tokens <= 0 or None, the maximum number of tokens to generate is unlimited and depends on n_ctx.
temperature: The temperature to use for sampling.
top_p: The top-p value to use for nucleus sampling. Nucleus sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
min_p: The min-p value to use for minimum p sampling. Minimum P sampling as described in https://github.com/ggerganov/llama.cpp/pull/3841
typical_p: The typical-p value to use for sampling. Locally Typical Sampling implementation described in the paper https://arxiv.org/abs/2202.00666.
logprobs: The number of logprobs to return. If None, no logprobs are returned.
echo: Whether to echo the prompt.
stop: A list of strings to stop generation when encountered.
frequency_penalty: The penalty to apply to tokens based on their frequency in the prompt.
presence_penalty: The penalty to apply to tokens based on their presence in the prompt.
repeat_penalty: The penalty to apply to repeated tokens.
top_k: The top-k value to use for sampling. Top-K sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
stream: Whether to stream the results.
seed: The seed to use for sampling.
tfs_z: The tail-free sampling parameter. Tail Free Sampling described in https://www.trentonbricken.com/Tail-Free-Sampling/.
mirostat_mode: The mirostat sampling mode.
mirostat_tau: The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text.
mirostat_eta: The learning rate used to update `mu` based on the error between the target and observed surprisal of the sampled word. A larger learning rate will cause `mu` to be updated more quickly, while a smaller learning rate will result in slower updates.
model: The name to use for the model in the completion object.
stopping_criteria: A list of stopping criteria to use.
logits_processor: A list of logits processors to use.
grammar: A grammar to use for constrained sampling.
logit_bias: A logit bias to use.
Raises:
ValueError: If the requested tokens exceed the context window.
RuntimeError: If the prompt fails to tokenize or the model fails to evaluate the prompt.
Returns:
Response object containing the generated text.
"""
print(output)
#print(output['choices'][0]['message']['content'])
self.text_widget.after(0, self.update_text_widget, output['choices'][0])
print("##### Llama Finished")
except Exception as e:
print(f"Error during Llama operation: {e}")
self.text_widget.after(0, self.update_text_widget, "An error occurred, please try again.")
def update_text_widget(self, content):
if self.winfo_exists():
self.text_widget.delete('1.0', tk.END)
self.text_widget.insert(tk.END, content)

86
lsb_package/start_page.py Normal file
View file

@ -0,0 +1,86 @@
import tkinter as tk
from tkinter import ttk
from datetime import datetime
import sounddevice as sd
import soundfile as sf
import threading
import numpy as np
from .transcription_page import TranscriptionPage
class StartPage(tk.Frame):
def __init__(self, parent, app_data):
super().__init__(parent)
self.app = parent
self.app_data = app_data
self.recording = False
self.recording_data = []
tk.Label(self, text="Lecture Summary Bot", font=self.app_data.heading_font).pack(pady=5)
self.start_page_description = tk.Label(self, text="Start by recording your lecture using selected input device.", font=self.app_data.paragraph_font, wraplength=400, justify="left")
self.start_page_description.pack(pady=5)
# Fetch all devices and filter for those with input channels
all_devices = sd.query_devices()
input_devices = {all_devices[i]['name']: i for i in range(len(all_devices)) if all_devices[i]['max_input_channels'] > 0}
# Dropdown for device selection
self.device_var = tk.StringVar()
device_names = list(input_devices.keys())
self.device_menu = ttk.Combobox(self, values=device_names, textvariable=self.device_var)
if device_names: # Automatically select the default input device if available
self.device_var.set("default")
self.device_menu.pack(pady=10)
ttk.Button(self, text="Start Recording", command=self.start_recording).pack(pady=5)
ttk.Button(self, text="Stop Recording", command=self.stop_recording).pack(pady=5)
ttk.Button(self, text="Skip Recording", command=self.skip_recording_page).pack(pady=5)
# Recording indicator
self.recording_indicator = tk.Label(self, text="Recording: OFF", fg="red")
self.recording_indicator.pack(pady=5)
def start_recording(self):
if not self.recording:
self.recording = True
self.update_recording_indicator(True)
self.recording_data = []
device_index = sd.query_devices().index(sd.query_devices(self.device_var.get()))
threading.Thread(target=self.record_audio, args=(device_index,)).start()
def stop_recording(self):
if self.recording:
self.recording = False
self.update_recording_indicator(False)
def record_audio(self, device_index):
try:
with sd.InputStream(device=device_index, samplerate=16000, channels=1, callback=self.audio_callback):
while self.recording:
sd.sleep(1000)
finally:
self.save_recording()
def audio_callback(self, indata, frames, time, status):
self.recording_data.append(indata.copy())
def save_recording(self):
date_time_str = datetime.now().strftime("%Y%m%d_%H%M%S")
self.app_data.lecture_filename = f"lecture_{date_time_str}"
audio_filepath = f"recordings/recording_{self.app_data.lecture_filename}.wav"
sf.write(audio_filepath, np.concatenate(self.recording_data, axis=0), 16000)
self.app.frames[TranscriptionPage].start_transcription_process(audio_filepath)
self.app.show_frame(TranscriptionPage)
def skip_recording_page(self):
self.app.frames[TranscriptionPage].start_transcription_process("skipped")
self.app.show_frame(TranscriptionPage)
def update_recording_indicator(self, is_recording):
#"""Update the recording indicator based on the recording state."""
if is_recording:
self.recording_indicator.config(text="Recording: ON", fg="green")
else:
self.recording_indicator.config(text="Recording: OFF", fg="red")

View file

@ -0,0 +1,136 @@
import tkinter as tk
from tkinter import ttk
import threading
import whisper
import torch
import gc
from .keywords_page import KeywordsPage
class TranscriptionPage(tk.Frame):
def __init__(self, parent, app_data):
super().__init__(parent)
self.app = parent
self.app_data = app_data
self.grid_columnconfigure(0, weight=1)
self.grid_rowconfigure(7, weight=1)
self.transcription_description = """Wait until your audio input gets processed.
> You can pass citations to your notes by highlighting piece of text with **text to cite**
> Press <Control-f> when focused in text-area, to find the next match from the current cursor position
> Pressing "Extract Keywords" will save the modified transcription"""
tk.Label(self, text="Transcription", font=self.app_data.heading_font).grid(row=0, column=0, columnspan=2, pady=5, padx=5)
self.audio_filename_label = tk.Label(self, text="No file", font=self.app_data.mono_font)
self.audio_filename_label.grid(row=1, column=0, pady=4)
tk.Label(self, text=self.transcription_description, font=self.app_data.paragraph_font, justify="left").grid(row=3, column=0, columnspan=2, padx=20)
self.transcription_textarea = tk.Text(self, wrap="word", font=self.app_data.paragraph_font)
self.transcription_textarea.grid(row=6, column=0, sticky="nsew", pady=6, padx=6)
# Search box (initially not displayed)
self.search_box = tk.Entry(self)
self.search_tooltip = tk.Label(self, text="enter a string and press enter", font=("DejaVu Sans", 10), justify="left")
self.search_box.bind("<Return>", self.search_text)
self.transcription_textarea.bind('<Control-a>', select_all)
self.transcription_textarea.bind('<Control-A>', select_all)
# Bind Ctrl+F to display the search box
self.transcription_textarea.bind('<Control-f>', self.display_search_box)
self.transcription_textarea.bind('<Control-F>', self.display_search_box)
transcription_scrollbar = tk.Scrollbar(self, command=self.transcription_textarea.yview)
transcription_scrollbar.grid(row=6, column=1, sticky="ns")
self.transcription_textarea.config(yscrollcommand=transcription_scrollbar.set)
tk.Button(self, text="Extract Keywords", command=self.forward_to_keywords_page).grid(row=7, column=0, columnspan=2, pady=5)
def display_search_box(self, event):
# Display the search box above the text area
self.search_box.grid(row=4, column=0, pady=(0, 6), padx=6, sticky="ew")
self.search_box.focus_set()
self.search_tooltip.grid(row=5, column=0, sticky="ew")
def search_text(self, event):
# Get the search query from the Entry widget
query = self.search_box.get()
if not query:
return
# Starting position for the search (insert cursor position)
start_pos = self.transcription_textarea.index(tk.INSERT)
# Search for the query in the text area
pos = self.transcription_textarea.search(query, start_pos, tk.END)
if pos:
# If found, move cursor to the start of the found text and select the text
end_pos = f"{pos}+{len(query)}c" # Calculate end position of the selection
self.transcription_textarea.tag_remove(tk.SEL, "1.0", tk.END)
self.transcription_textarea.tag_add(tk.SEL, pos, end_pos)
self.transcription_textarea.mark_set(tk.INSERT, pos)
self.transcription_textarea.see(pos)
# Hide the search box after search
self.search_box.grid_remove()
self.search_tooltip.grid_remove()
def insert_into_textarea(self, transcription):
"""Insert transcription text into the Text widget."""
def update_text():
self.transcription_textarea.delete('1.0', tk.END)
self.transcription_textarea.insert(tk.END, transcription)
self.transcription_textarea.after(0, update_text)
def start_transcription_process(self, audio_filepath):
"""Start transcription threading process."""
self.audio_filename_label.config(text=f"Filename: {audio_filepath}")
# Check if the lecture_filename was skipped
if audio_filepath == "skipped":
self.insert_into_textarea("")
return
else:
self.insert_into_textarea("Transcribing, please wait...")
threading.Thread(target=self.create_transcription, args=(audio_filepath,), daemon=True).start()
gc.collect() # Collect garbage to free up memory (doesn't seem to work)
return
def create_transcription(self, audio_filepath):
"""Transcribe with Whisper."""
try:
hw_device = "cuda" if torch.cuda.is_available() else "cpu"
whisper_model = whisper.load_model("small", device=hw_device)
device_label = tk.Label(self, text="Loaded Whisper on: " + hw_device, font=self.app_data.mono_font)
device_label.grid(row=2, column=0, pady=4)
transcription_text = whisper_model.transcribe(audio_filepath)
self.insert_into_textarea(transcription_text['text'])
# Collect garbage to free up memory (doesn't seem to work)
del whisper_model
if (hw_device=='cuda'):
torch.cuda.empty_cache()
gc.collect()
except Exception as e:
print(f"Error during transcription: {e}")
self.insert_into_textarea("Failed to transcribe audio.")
def forward_to_keywords_page(self):
transcribed_text = self.transcription_textarea.get("1.0", tk.END)
self.app.show_frame(KeywordsPage)
self.app.frames[KeywordsPage].start_kw_extraction_process(transcribed_text)
def select_all(event):
text_widget = event.widget
text_widget.tag_add(tk.SEL, "1.0", tk.END)
text_widget.mark_set(tk.INSERT, "1.0")
text_widget.see(tk.INSERT)
return 'break'