init

2024-04-25 20:34:28 +02:00 · 2024-04-25 20:34:28 +02:00 · 2042af8d20
commit 2042af8d20
parent 0cb2b64509
16 changed files with 1524 additions and 0 deletions
--- a/lsb_package/init.py
+++ b/lsb_package/init.py
@ -0,0 +1,4 @@
+from .start_page import StartPage
+from .transcription_page import TranscriptionPage 
+from .keywords_page import KeywordsPage
+from .llm_page import LlmPage
--- a/lsb_package/pycache/init.cpython-310.pyc
+++ b/lsb_package/pycache/init.cpython-310.pyc
--- a/lsb_package/pycache/keywords_page.cpython-310.pyc
+++ b/lsb_package/pycache/keywords_page.cpython-310.pyc
--- a/lsb_package/pycache/llm_page.cpython-310.pyc
+++ b/lsb_package/pycache/llm_page.cpython-310.pyc
--- a/lsb_package/pycache/start_page.cpython-310.pyc
+++ b/lsb_package/pycache/start_page.cpython-310.pyc
--- a/lsb_package/pycache/transcription_page.cpython-310.pyc
+++ b/lsb_package/pycache/transcription_page.cpython-310.pyc
--- a/lsb_package/keywords_page.py
+++ b/lsb_package/keywords_page.py
@ -0,0 +1,145 @@
+import tkinter as tk
+from tkinter import ttk
+from nltk.stem import WordNetLemmatizer
+from datetime import datetime
+import re
+#from sklearn.decomposition import NMF, LatentDirichletAllocation, MiniBatchNMF
+from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
+from nltk.tokenize import word_tokenize
+from .llm_page import LlmPage
+
+
+class KeywordsPage(tk.Frame):
+    def __init__(self, parent, app_data):
+        super().__init__(parent)
+        self.app = parent
+        self.app_data = app_data
+        self.grid_columnconfigure(0, weight=1)
+        self.grid_rowconfigure(4, weight=1)
+        self.keywords_description = 'Here are the extracted keywords. You can modify them to your liking before feeding them into note generation. Keywords will be added to the top of the transcription file.'
+
+        
+        tk.Label(self, text="Keywords", font=self.app_data.heading_font).grid(row=0, column=0, columnspan=2)
+        
+        self.lecture_filename_label = tk.Label(self, font=self.app_data.mono_font)
+        self.lecture_filename_label.grid(row=1, column=0, pady=4)
+        
+        tk.Label(self, text=self.keywords_description, font=self.app_data.paragraph_font, wraplength=400, justify="left").grid(row=2, column=0, columnspan=2, pady=5, padx=5)
+
+        self.keywords_textarea = tk.Text(self, wrap="word", font=self.app_data.paragraph_font)
+        self.keywords_textarea.grid(row=3, column=0, sticky="nsew", pady=6, padx=6)
+
+        keywords_scrollbar = tk.Scrollbar(self, command=self.keywords_textarea.yview)
+        keywords_scrollbar.grid(row=3, column=1, sticky="ns")
+        self.keywords_textarea.config(yscrollcommand=keywords_scrollbar.set)
+
+        tk.Button(self, text="Generate Notes", command=self.write_kw_and_forward_to_llm_page).grid(row=4, column=0, columnspan=2, pady=5)
+
+
+    def write_kw_and_forward_to_llm_page(self):
+        self.modified_keywords = self.keywords_textarea.get('1.0', tk.END)
+        self.app_data.modified_keywords = self.modified_keywords
+        #print(self.app_data.modified_keywords)
+        keywords = f"Transcription keywords:\n\n{self.modified_keywords}\n"
+        filename = f"recordings/transcript_{self.app_data.lecture_filename}.txt"
+        with open(filename, 'r') as file:
+            transcription = file.read()
+        with open(filename, 'w') as file:
+            file.write(keywords)
+            file.write(transcription)
+        self.app.show_frame(LlmPage)
+
+
+    def start_kw_extraction_process(self, transcription_text):
+        # Save the transcription to a text file
+        if (self.app_data.lecture_filename == None):
+            date_time_str = datetime.now().strftime("%Y%m%d_%H%M%S")
+            self.app_data.lecture_filename = f"lecture_{date_time_str}"
+            filename = f"recordings/transcript_{self.app_data.lecture_filename}.txt"
+            self.lecture_filename_label.config(text=filename)
+        else:
+            filename = f"recordings/transcript_{self.app_data.lecture_filename}.txt"
+            self.lecture_filename_label.config(text=filename)
+        
+        with open(filename, "w") as file:
+            file.write(transcription_text)
+
+        # Extract the keywords
+        keywords = self.extract_topics(transcription_text)
+        self.keywords_textarea.delete('1.0', tk.END)
+        self.keywords_textarea.insert(tk.END, "\n".join(keywords))
+
+    def extract_topics(self, transcript):
+        """Lemmatizing words into their simplest form"""
+        lemmatizer = WordNetLemmatizer()
+
+        # Split transcript into sentences
+        sentences = re.split(r'[.!?]', transcript)
+
+        # Initialize list to store lemmatized data
+        cleaned_data = []
+
+        # Preprocess and lemmatize each sentence
+        for sentence in sentences:
+            # Preprocess the sentence
+            sentence = sentence.lower()
+            sentence = re.sub(r'[^\w\s]', '', sentence)
+            # Tokenize the preprocessed sentence
+            words = word_tokenize(sentence)    
+            # Lemmatize each word and join back to form the sentence
+            lemmatized_sentence = ' '.join([lemmatizer.lemmatize(word, pos='v') for word in words])
+            cleaned_data.append(lemmatized_sentence)
+
+
+        """Setting tf-idf and NMF variables"""
+        n_samples = len(cleaned_data)
+        n_features = 20
+        n_components = 10
+        n_top_words = 10
+        batch_size = 128
+        init = "nndsvda"
+
+
+        """Use tf-idf features for NMF"""
+        data_samples = cleaned_data[:n_samples]
+        tfidf_vectorizer = TfidfVectorizer(
+            max_df=0.30,
+            min_df=2,
+            max_features=n_features,
+            stop_words="english"
+        )
+        
+        tfidf = tfidf_vectorizer.fit_transform(data_samples)
+        
+        tfidf_feature_names = tfidf_vectorizer.get_feature_names_out()
+        
+        #print("TF-IDF Feature names: ", tfidf_feature_names)
+
+
+#        nmf = NMF(
+#            n_components=n_components,
+#            max_iter=n_samples,
+#            #tol=1e-4,
+#            random_state=1,
+#            init=init,
+#            beta_loss="frobenius",
+#            alpha_W=0.00005, #directory theres run command dash, file way root theres echo, hello echo example say run, just run say things dash, know things command say theres, like things example theres way, program run echo way say, shell run things way command, use way things dash command, want root say example command
+#            alpha_H=0.00005,
+#            #alpha_W=0, # directory theres run command dash, file way root theres echo, hello echo example say run, just run say things dash, know things command say theres, like things example theres way, program run echo way say, shell run things way command, use way things dash command, want root say example command
+#            #alpha_H=0,
+#            l1_ratio=1,
+#        ).fit(tfidf)
+
+        topics_list = []
+
+        # Collect the top words for each topic
+        #for topic_idx, topic in enumerate(nmf.components_):
+            # Get the top 5 words for this topic
+        #    top_words = [tfidf_feature_names[i] for i in topic.argsort()[:-5 - 1:-1]]
+            # Convert the list of top words to a string and add to the topics list
+        #    topics_list.append(" ".join(top_words))
+
+        topics = set(topics_list)  # Naive splitting by spaces
+        #return sorted(topics)  # Return a sorted list of unique words
+        return sorted(tfidf_feature_names)  # Return a sorted list of unique words
+
--- a/lsb_package/llm_page.py
+++ b/lsb_package/llm_page.py
@ -0,0 +1,93 @@
+
+from llama_cpp import Llama
+import tkinter as tk
+from tkinter import ttk
+import threading
+
+class LlmPage(tk.Frame):
+    def __init__(self, parent, app_data):
+        super().__init__(parent)
+        self.app = parent
+        self.app_data = app_data
+        tk.Label(self, text="Notes", font=self.app_data.heading_font).grid(row=0, column=0, sticky="ew", pady=2, padx=2)
+        tk.Label(self, text="Press the generate button and wait for your notes to generate.", font=self.app_data.paragraph_font, wraplength=400, justify="left").grid(row=1, column=0, sticky="ew", pady=2, padx=2)
+        self.text_widget = tk.Text(self, font=self.app_data.paragraph_font, wrap="word")
+        self.text_widget.grid(row=2, column=0, sticky="nsew", pady=6, padx=6)
+
+        self.start_button = ttk.Button(self, text="Start Operation", command=self.start_llama_operation)
+        self.start_button.grid(row=3, column=0, sticky="ew", pady=2, padx=2)
+
+    def start_llama_operation(self):
+        if self.app_data.modified_keywords is None:
+            self.text_widget.insert(tk.END, "Keywords have not been set.")
+        else:
+            self.text_widget.delete('1.0', tk.END)
+            self.text_widget.insert(tk.END, "Please wait...")
+
+            operation_thread = threading.Thread(target=self.run_llama_operation, args=(self.app_data.modified_keywords,))  # Pass data explicitly
+            operation_thread.start()
+
+    def run_llama_operation(self, llmTopics):
+        try:
+            print("##### Started Llama...")
+            print(llmTopics)
+            # Example: Llama class must be imported correctly here
+            llm = Llama(model_path="/home/jrosh/.local/share/nomic.ai/GPT4All/gpt4all-falcon-newbpe-q4_0.gguf", n_ctx=2048, )
+            # output = llm.create_chat_completion(
+            #     messages=[
+            #         {"role": "system", "content": "You are a teacher explaining in great detail given topics divided by new line."},
+            #         {"role": "user", "content": llmTopics}  # Use local variable passed to thread
+            #     ]
+            # )
+            output = llm(
+                f"Genereate comprehensive, informative and factual descriptions for the provided keywords '{llmTopics}", # Prompt
+                max_tokens=0, # Generate up to 32 tokens, set to None to generate up to the end of the context window
+
+            )
+            """Generate text from a prompt.
+            Args:
+                prompt: The prompt to generate text from.
+                suffix: A suffix to append to the generated text. If None, no suffix is appended.
+                max_tokens: The maximum number of tokens to generate. If max_tokens <= 0 or None, the maximum number of tokens to generate is unlimited and depends on n_ctx.
+                temperature: The temperature to use for sampling.
+                top_p: The top-p value to use for nucleus sampling. Nucleus sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
+                min_p: The min-p value to use for minimum p sampling. Minimum P sampling as described in https://github.com/ggerganov/llama.cpp/pull/3841
+                typical_p: The typical-p value to use for sampling. Locally Typical Sampling implementation described in the paper https://arxiv.org/abs/2202.00666.
+                logprobs: The number of logprobs to return. If None, no logprobs are returned.
+                echo: Whether to echo the prompt.
+                stop: A list of strings to stop generation when encountered.
+                frequency_penalty: The penalty to apply to tokens based on their frequency in the prompt.
+                presence_penalty: The penalty to apply to tokens based on their presence in the prompt.
+                repeat_penalty: The penalty to apply to repeated tokens.
+                top_k: The top-k value to use for sampling. Top-K sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
+                stream: Whether to stream the results.
+                seed: The seed to use for sampling.
+                tfs_z: The tail-free sampling parameter. Tail Free Sampling described in https://www.trentonbricken.com/Tail-Free-Sampling/.
+                mirostat_mode: The mirostat sampling mode.
+                mirostat_tau: The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text.
+                mirostat_eta: The learning rate used to update `mu` based on the error between the target and observed surprisal of the sampled word. A larger learning rate will cause `mu` to be updated more quickly, while a smaller learning rate will result in slower updates.
+                model: The name to use for the model in the completion object.
+                stopping_criteria: A list of stopping criteria to use.
+                logits_processor: A list of logits processors to use.
+                grammar: A grammar to use for constrained sampling.
+                logit_bias: A logit bias to use.
+
+            Raises:
+                ValueError: If the requested tokens exceed the context window.
+                RuntimeError: If the prompt fails to tokenize or the model fails to evaluate the prompt.
+
+            Returns:
+                Response object containing the generated text.
+            """
+            print(output)
+            #print(output['choices'][0]['message']['content'])
+            self.text_widget.after(0, self.update_text_widget, output['choices'][0])
+            print("##### Llama Finished")
+        except Exception as e:
+            print(f"Error during Llama operation: {e}")
+            self.text_widget.after(0, self.update_text_widget, "An error occurred, please try again.")
+
+    def update_text_widget(self, content):
+        if self.winfo_exists():
+            self.text_widget.delete('1.0', tk.END)
+            self.text_widget.insert(tk.END, content)
--- a/lsb_package/start_page.py
+++ b/lsb_package/start_page.py
@ -0,0 +1,86 @@
+import tkinter as tk
+from tkinter import ttk
+from datetime import datetime
+import sounddevice as sd
+import soundfile as sf
+import threading
+import numpy as np
+from .transcription_page import TranscriptionPage
+
+class StartPage(tk.Frame):
+    def __init__(self, parent, app_data):
+        super().__init__(parent)
+        self.app = parent
+        self.app_data = app_data
+        self.recording = False
+        self.recording_data = []
+        
+        
+        tk.Label(self, text="Lecture Summary Bot", font=self.app_data.heading_font).pack(pady=5)
+        self.start_page_description = tk.Label(self, text="Start by recording your lecture using selected input device.", font=self.app_data.paragraph_font, wraplength=400, justify="left")
+        self.start_page_description.pack(pady=5)
+
+        # Fetch all devices and filter for those with input channels
+        all_devices = sd.query_devices()
+        input_devices = {all_devices[i]['name']: i for i in range(len(all_devices)) if all_devices[i]['max_input_channels'] > 0}
+
+        # Dropdown for device selection
+        self.device_var = tk.StringVar()
+        device_names = list(input_devices.keys())
+        self.device_menu = ttk.Combobox(self, values=device_names, textvariable=self.device_var)
+        if device_names:  # Automatically select the default input device if available
+            self.device_var.set("default")
+        self.device_menu.pack(pady=10)
+
+        ttk.Button(self, text="Start Recording", command=self.start_recording).pack(pady=5)
+        ttk.Button(self, text="Stop Recording", command=self.stop_recording).pack(pady=5)
+        ttk.Button(self, text="Skip Recording", command=self.skip_recording_page).pack(pady=5)
+
+        # Recording indicator
+        self.recording_indicator = tk.Label(self, text="Recording: OFF", fg="red")
+        self.recording_indicator.pack(pady=5)
+
+    def start_recording(self):
+        if not self.recording:
+            self.recording = True
+            self.update_recording_indicator(True)
+            self.recording_data = []
+            device_index = sd.query_devices().index(sd.query_devices(self.device_var.get()))
+            threading.Thread(target=self.record_audio, args=(device_index,)).start()
+
+    def stop_recording(self):
+        if self.recording:
+            self.recording = False
+            self.update_recording_indicator(False)
+
+    def record_audio(self, device_index):
+        try:
+            with sd.InputStream(device=device_index, samplerate=16000, channels=1, callback=self.audio_callback):
+                while self.recording:
+                    sd.sleep(1000)
+        finally:
+            self.save_recording()
+
+    def audio_callback(self, indata, frames, time, status):
+        self.recording_data.append(indata.copy())
+
+    def save_recording(self):
+        date_time_str = datetime.now().strftime("%Y%m%d_%H%M%S")
+        self.app_data.lecture_filename = f"lecture_{date_time_str}"
+        audio_filepath = f"recordings/recording_{self.app_data.lecture_filename}.wav"
+        sf.write(audio_filepath, np.concatenate(self.recording_data, axis=0), 16000)
+        
+        self.app.frames[TranscriptionPage].start_transcription_process(audio_filepath)
+        self.app.show_frame(TranscriptionPage)
+
+    def skip_recording_page(self):
+        self.app.frames[TranscriptionPage].start_transcription_process("skipped")
+        self.app.show_frame(TranscriptionPage)
+
+
+    def update_recording_indicator(self, is_recording):
+        #"""Update the recording indicator based on the recording state."""
+        if is_recording:
+            self.recording_indicator.config(text="Recording: ON", fg="green")
+        else:
+            self.recording_indicator.config(text="Recording: OFF", fg="red")
--- a/lsb_package/transcription_page.py
+++ b/lsb_package/transcription_page.py
@ -0,0 +1,136 @@
+import tkinter as tk
+from tkinter import ttk
+import threading
+import whisper
+import torch
+import gc
+from .keywords_page import KeywordsPage
+
+
+class TranscriptionPage(tk.Frame):
+    def __init__(self, parent, app_data):
+        super().__init__(parent)
+        self.app = parent
+        self.app_data = app_data
+        self.grid_columnconfigure(0, weight=1)
+        self.grid_rowconfigure(7, weight=1)
+        self.transcription_description = """Wait until your audio input gets processed.
+> You can pass citations to your notes by highlighting piece of text with **text to cite**
+> Press <Control-f> when focused in text-area, to find the next match from the current cursor position
+> Pressing "Extract Keywords" will save the modified transcription"""
+
+
+        tk.Label(self, text="Transcription", font=self.app_data.heading_font).grid(row=0, column=0, columnspan=2, pady=5, padx=5)
+
+        self.audio_filename_label = tk.Label(self, text="No file", font=self.app_data.mono_font)
+        self.audio_filename_label.grid(row=1, column=0, pady=4)
+
+        tk.Label(self, text=self.transcription_description, font=self.app_data.paragraph_font, justify="left").grid(row=3, column=0, columnspan=2, padx=20)
+
+        self.transcription_textarea = tk.Text(self, wrap="word", font=self.app_data.paragraph_font)
+        self.transcription_textarea.grid(row=6, column=0, sticky="nsew", pady=6, padx=6)
+
+        # Search box (initially not displayed)
+        self.search_box = tk.Entry(self)
+        self.search_tooltip = tk.Label(self, text="enter a string and press enter", font=("DejaVu Sans", 10), justify="left")
+        self.search_box.bind("<Return>", self.search_text)
+
+        self.transcription_textarea.bind('<Control-a>', select_all)
+        self.transcription_textarea.bind('<Control-A>', select_all)
+
+        # Bind Ctrl+F to display the search box
+        self.transcription_textarea.bind('<Control-f>', self.display_search_box)
+        self.transcription_textarea.bind('<Control-F>', self.display_search_box)
+
+        transcription_scrollbar = tk.Scrollbar(self, command=self.transcription_textarea.yview)
+        transcription_scrollbar.grid(row=6, column=1, sticky="ns")
+        self.transcription_textarea.config(yscrollcommand=transcription_scrollbar.set)
+
+        tk.Button(self, text="Extract Keywords", command=self.forward_to_keywords_page).grid(row=7, column=0, columnspan=2, pady=5)
+
+
+    def display_search_box(self, event):
+        # Display the search box above the text area
+        self.search_box.grid(row=4, column=0, pady=(0, 6), padx=6, sticky="ew")
+        self.search_box.focus_set()
+        self.search_tooltip.grid(row=5, column=0, sticky="ew")
+    def search_text(self, event):
+        # Get the search query from the Entry widget
+        query = self.search_box.get()
+        if not query:
+            return
+
+        # Starting position for the search (insert cursor position)
+        start_pos = self.transcription_textarea.index(tk.INSERT)
+        # Search for the query in the text area
+        pos = self.transcription_textarea.search(query, start_pos, tk.END)
+
+        if pos:
+            # If found, move cursor to the start of the found text and select the text
+            end_pos = f"{pos}+{len(query)}c"  # Calculate end position of the selection
+            self.transcription_textarea.tag_remove(tk.SEL, "1.0", tk.END)
+            self.transcription_textarea.tag_add(tk.SEL, pos, end_pos)
+            self.transcription_textarea.mark_set(tk.INSERT, pos)
+            self.transcription_textarea.see(pos)
+
+        # Hide the search box after search
+        self.search_box.grid_remove()
+        self.search_tooltip.grid_remove()
+
+
+
+    def insert_into_textarea(self, transcription):
+        """Insert transcription text into the Text widget."""
+        def update_text():
+            self.transcription_textarea.delete('1.0', tk.END)
+            self.transcription_textarea.insert(tk.END, transcription)
+        self.transcription_textarea.after(0, update_text)
+
+    def start_transcription_process(self, audio_filepath):
+        """Start transcription threading process."""
+        self.audio_filename_label.config(text=f"Filename: {audio_filepath}")
+        # Check if the lecture_filename was skipped
+        if audio_filepath == "skipped":
+            self.insert_into_textarea("")
+            return
+        else:
+            self.insert_into_textarea("Transcribing, please wait...")
+            threading.Thread(target=self.create_transcription, args=(audio_filepath,), daemon=True).start()
+            gc.collect()  # Collect garbage to free up memory (doesn't seem to work)
+
+            return
+
+    def create_transcription(self, audio_filepath): 
+        """Transcribe with Whisper."""
+        try:
+            hw_device = "cuda" if torch.cuda.is_available() else "cpu"
+            whisper_model = whisper.load_model("small", device=hw_device)
+            device_label = tk.Label(self, text="Loaded Whisper on: " + hw_device, font=self.app_data.mono_font)
+            device_label.grid(row=2, column=0, pady=4)
+
+            transcription_text = whisper_model.transcribe(audio_filepath)
+            self.insert_into_textarea(transcription_text['text'])
+
+            
+            # Collect garbage to free up memory (doesn't seem to work)
+            del whisper_model
+            if (hw_device=='cuda'):
+                torch.cuda.empty_cache()
+            gc.collect()  
+
+
+        except Exception as e:
+            print(f"Error during transcription: {e}")
+            self.insert_into_textarea("Failed to transcribe audio.")
+    
+    def forward_to_keywords_page(self):
+        transcribed_text = self.transcription_textarea.get("1.0", tk.END)
+        self.app.show_frame(KeywordsPage)
+        self.app.frames[KeywordsPage].start_kw_extraction_process(transcribed_text)
+
+def select_all(event):
+    text_widget = event.widget
+    text_widget.tag_add(tk.SEL, "1.0", tk.END)
+    text_widget.mark_set(tk.INSERT, "1.0")
+    text_widget.see(tk.INSERT)
+    return 'break'