diff --git a/main.py b/main.py index b42f41d..4d4be19 100644 --- a/main.py +++ b/main.py @@ -1,4 +1,5 @@ import logging +import random from telegram import Update from telegram.ext import ( @@ -9,7 +10,7 @@ from telegram.ext import ( filters, ) -from nlp import is_noun_follows_verb +import nlp from settings import Settings # Load environment variables @@ -29,23 +30,18 @@ async def start(update: Update, context: ContextTypes.DEFAULT_TYPE): async def message_handler(update: Update, context: ContextTypes.DEFAULT_TYPE): - # Get the message text content - msg_content = update.effective_message.text + msg = update.effective_message + msg_content = msg.text - # Ignore messages without text if not msg_content: return - - # Check that the message doesn't have more than 5 words - if len(msg_content.split()) > 5: - return - # Check if a noun immediately follows a verb - is_follows_verb, verb = is_noun_follows_verb(msg_content) - if is_follows_verb: - await update.effective_message.reply_text(f"{verb} deez") + # generate exploitable phrases + exploitable_phrases = nlp.find_exploitable_phrases(msg_content) - return + phrase = random.choice(exploitable_phrases) + + await msg.reply_text(phrase) if __name__ == "__main__": diff --git a/nlp.py b/nlp.py index 6e5964c..3a22d9f 100644 --- a/nlp.py +++ b/nlp.py @@ -7,17 +7,70 @@ nlp = spacy.load("en_core_web_sm") def is_noun_follows_verb(text: str) -> bool: doc = nlp(text) for i in range(len(doc) - 1): + print(doc[i].pos_, doc[i].text, doc[i + 1].pos_, doc[i + 1].text) # Check if the current token is a verb and the next token is a noun if doc[i].pos_ == "VERB" and doc[i + 1].pos_ in ["NOUN", "PROPN", "PRON"]: return True, doc[i].lemma_ - + return False, None +def find_exploitable_phrases(sentence): + # Parse the sentence using spaCy + doc = nlp(sentence) + exploitable_phrases = [] + + for token in doc: + if token.pos_ == "VERB": + # Collect the verb and its relevant preceding words + phrase = [token.lemma_] + preceding_tokens = [] + + # Collect adverbs and prepositions that are syntactically dependent on the verb + # eg: I am testing for bugs. -> "for" is dependent on "testing" + for child in token.children: + if child.dep_ in {"advmod", "neg", "prep"}: + preceding_tokens.append(child) + + # Sort the preceding tokens by their position in the sentence + # this makes it sound natural + preceding_tokens = sorted(preceding_tokens, key=lambda x: x.i) + + # Add the sorted preceding tokens to the phrase + # depending on certain conditions + for t in preceding_tokens: + # if the token is a preposition, add the preposition and its dependent to the phrase + if t.dep_ == "prep": + phrase.append(t.text) + + # if the preposition has a dependent which is a + # prepositional complement, add the dependent to the phrase + for subchild in t.children: + if subchild.dep_ in {"pcomp"}: + phrase.append(subchild.text) + # otherwise, add the token to the beginning of the phrase + else: + phrase.insert(0, t.text) + + phrase_text = " ".join(phrase) + exploitable_phrases.append(phrase_text) + + return exploitable_phrases + + if __name__ == "__main__": - text = "I was eating pizza" - is_follows_verb, verb = is_noun_follows_verb(text) - if is_follows_verb: - print(f"{verb} deez") - else: - print("No noun follows a verb in the text") \ No newline at end of file + # Test the function + sentences = [ + "I am testing for bugs.", + "She was speaking at a conference.", + "He is looking into the issue.", + "They are working on the project.", + "Apple is looking at buying U.K. startup for $1 billion", + ] + + for sentence in sentences: + phrases = find_exploitable_phrases(sentence) + for phrase in phrases: + print(f"Original phrase: {sentence}") + print(f"Exploitable phrase: {phrase} deez") + print()