feat(nlp): improve exploitable phrases

Signed-off-by: Sphericalkat <me@kat.bio>
2024-05-25 17:57:15 +05:30 · 2024-05-25 17:57:15 +05:30 · 8e8bed9813
commit 8e8bed9813
parent dd1e11bd45
2 changed files with 69 additions and 20 deletions
--- a/main.py
+++ b/main.py
@ -1,4 +1,5 @@
 import logging
 import random
 from telegram import Update
 from telegram.ext import (
@ -9,7 +10,7 @@ from telegram.ext import (
    filters,
 )
-from nlp import is_noun_follows_verb
+import nlp
 from settings import Settings
 # Load environment variables
@ -29,23 +30,18 @@ async def start(update: Update, context: ContextTypes.DEFAULT_TYPE):
 async def message_handler(update: Update, context: ContextTypes.DEFAULT_TYPE):
-    # Get the message text content
+    msg = update.effective_message
-    msg_content = update.effective_message.text
+    msg_content = msg.text
    # Ignore messages without text
    if not msg_content:
        return
-    # Check that the message doesn't have more than 5 words
+    # generate exploitable phrases
-    if len(msg_content.split()) > 5:
+    exploitable_phrases = nlp.find_exploitable_phrases(msg_content)
        return
-    # Check if a noun immediately follows a verb
+    phrase = random.choice(exploitable_phrases)
    is_follows_verb, verb = is_noun_follows_verb(msg_content)
    if is_follows_verb:
        await update.effective_message.reply_text(f"{verb} deez")
-    return
+    await msg.reply_text(phrase)
 if __name__ == "__main__":
--- a/nlp.py
+++ b/nlp.py
@ -7,6 +7,7 @@ nlp = spacy.load("en_core_web_sm")
 def is_noun_follows_verb(text: str) -> bool:
    doc = nlp(text)
    for i in range(len(doc) - 1):
        print(doc[i].pos_, doc[i].text, doc[i + 1].pos_, doc[i + 1].text)
        # Check if the current token is a verb and the next token is a noun
        if doc[i].pos_ == "VERB" and doc[i + 1].pos_ in ["NOUN", "PROPN", "PRON"]:
            return True, doc[i].lemma_
@ -14,10 +15,62 @@ def is_noun_follows_verb(text: str) -> bool:
    return False, None
 def find_exploitable_phrases(sentence):
    # Parse the sentence using spaCy
    doc = nlp(sentence)
    exploitable_phrases = []
    for token in doc:
        if token.pos_ == "VERB":
            # Collect the verb and its relevant preceding words
            phrase = [token.lemma_]
            preceding_tokens = []
            # Collect adverbs and prepositions that are syntactically dependent on the verb
            # eg: I am testing for bugs. -> "for" is dependent on "testing"
            for child in token.children:
                if child.dep_ in {"advmod", "neg", "prep"}:
                    preceding_tokens.append(child)
            # Sort the preceding tokens by their position in the sentence
            # this makes it sound natural
            preceding_tokens = sorted(preceding_tokens, key=lambda x: x.i)
            # Add the sorted preceding tokens to the phrase
            # depending on certain conditions
            for t in preceding_tokens:
                # if the token is a preposition, add the preposition and its dependent to the phrase
                if t.dep_ == "prep":
                    phrase.append(t.text)
                    # if the preposition has a dependent which is a 
                    # prepositional complement, add the dependent to the phrase
                    for subchild in t.children:
                        if subchild.dep_ in {"pcomp"}:
                            phrase.append(subchild.text)
                # otherwise, add the token to the beginning of the phrase
                else:
                    phrase.insert(0, t.text)
            phrase_text = " ".join(phrase)
            exploitable_phrases.append(phrase_text)
    return exploitable_phrases
 if __name__ == "__main__":
-    text = "I was eating pizza"
+    # Test the function
-    is_follows_verb, verb = is_noun_follows_verb(text)
+    sentences = [
-    if is_follows_verb:
+        "I am testing for bugs.",
-        print(f"{verb} deez")
+        "She was speaking at a conference.",
-    else:
+        "He is looking into the issue.",
-        print("No noun follows a verb in the text")
+        "They are working on the project.",
        "Apple is looking at buying U.K. startup for $1 billion",
    ]
    for sentence in sentences:
        phrases = find_exploitable_phrases(sentence)
        for phrase in phrases:
            print(f"Original phrase: {sentence}")
            print(f"Exploitable phrase: {phrase} deez")
            print()