feat(nlp): improve exploitable phrases
Signed-off-by: Sphericalkat <me@kat.bio>
This commit is contained in:
parent
dd1e11bd45
commit
8e8bed9813
20
main.py
20
main.py
@ -1,4 +1,5 @@
|
|||||||
import logging
|
import logging
|
||||||
|
import random
|
||||||
|
|
||||||
from telegram import Update
|
from telegram import Update
|
||||||
from telegram.ext import (
|
from telegram.ext import (
|
||||||
@ -9,7 +10,7 @@ from telegram.ext import (
|
|||||||
filters,
|
filters,
|
||||||
)
|
)
|
||||||
|
|
||||||
from nlp import is_noun_follows_verb
|
import nlp
|
||||||
from settings import Settings
|
from settings import Settings
|
||||||
|
|
||||||
# Load environment variables
|
# Load environment variables
|
||||||
@ -29,23 +30,18 @@ async def start(update: Update, context: ContextTypes.DEFAULT_TYPE):
|
|||||||
|
|
||||||
|
|
||||||
async def message_handler(update: Update, context: ContextTypes.DEFAULT_TYPE):
|
async def message_handler(update: Update, context: ContextTypes.DEFAULT_TYPE):
|
||||||
# Get the message text content
|
msg = update.effective_message
|
||||||
msg_content = update.effective_message.text
|
msg_content = msg.text
|
||||||
|
|
||||||
# Ignore messages without text
|
|
||||||
if not msg_content:
|
if not msg_content:
|
||||||
return
|
return
|
||||||
|
|
||||||
# Check that the message doesn't have more than 5 words
|
# generate exploitable phrases
|
||||||
if len(msg_content.split()) > 5:
|
exploitable_phrases = nlp.find_exploitable_phrases(msg_content)
|
||||||
return
|
|
||||||
|
|
||||||
# Check if a noun immediately follows a verb
|
phrase = random.choice(exploitable_phrases)
|
||||||
is_follows_verb, verb = is_noun_follows_verb(msg_content)
|
|
||||||
if is_follows_verb:
|
|
||||||
await update.effective_message.reply_text(f"{verb} deez")
|
|
||||||
|
|
||||||
return
|
await msg.reply_text(phrase)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
65
nlp.py
65
nlp.py
@ -7,6 +7,7 @@ nlp = spacy.load("en_core_web_sm")
|
|||||||
def is_noun_follows_verb(text: str) -> bool:
|
def is_noun_follows_verb(text: str) -> bool:
|
||||||
doc = nlp(text)
|
doc = nlp(text)
|
||||||
for i in range(len(doc) - 1):
|
for i in range(len(doc) - 1):
|
||||||
|
print(doc[i].pos_, doc[i].text, doc[i + 1].pos_, doc[i + 1].text)
|
||||||
# Check if the current token is a verb and the next token is a noun
|
# Check if the current token is a verb and the next token is a noun
|
||||||
if doc[i].pos_ == "VERB" and doc[i + 1].pos_ in ["NOUN", "PROPN", "PRON"]:
|
if doc[i].pos_ == "VERB" and doc[i + 1].pos_ in ["NOUN", "PROPN", "PRON"]:
|
||||||
return True, doc[i].lemma_
|
return True, doc[i].lemma_
|
||||||
@ -14,10 +15,62 @@ def is_noun_follows_verb(text: str) -> bool:
|
|||||||
return False, None
|
return False, None
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
def find_exploitable_phrases(sentence):
|
||||||
text = "I was eating pizza"
|
# Parse the sentence using spaCy
|
||||||
is_follows_verb, verb = is_noun_follows_verb(text)
|
doc = nlp(sentence)
|
||||||
if is_follows_verb:
|
exploitable_phrases = []
|
||||||
print(f"{verb} deez")
|
|
||||||
|
for token in doc:
|
||||||
|
if token.pos_ == "VERB":
|
||||||
|
# Collect the verb and its relevant preceding words
|
||||||
|
phrase = [token.lemma_]
|
||||||
|
preceding_tokens = []
|
||||||
|
|
||||||
|
# Collect adverbs and prepositions that are syntactically dependent on the verb
|
||||||
|
# eg: I am testing for bugs. -> "for" is dependent on "testing"
|
||||||
|
for child in token.children:
|
||||||
|
if child.dep_ in {"advmod", "neg", "prep"}:
|
||||||
|
preceding_tokens.append(child)
|
||||||
|
|
||||||
|
# Sort the preceding tokens by their position in the sentence
|
||||||
|
# this makes it sound natural
|
||||||
|
preceding_tokens = sorted(preceding_tokens, key=lambda x: x.i)
|
||||||
|
|
||||||
|
# Add the sorted preceding tokens to the phrase
|
||||||
|
# depending on certain conditions
|
||||||
|
for t in preceding_tokens:
|
||||||
|
# if the token is a preposition, add the preposition and its dependent to the phrase
|
||||||
|
if t.dep_ == "prep":
|
||||||
|
phrase.append(t.text)
|
||||||
|
|
||||||
|
# if the preposition has a dependent which is a
|
||||||
|
# prepositional complement, add the dependent to the phrase
|
||||||
|
for subchild in t.children:
|
||||||
|
if subchild.dep_ in {"pcomp"}:
|
||||||
|
phrase.append(subchild.text)
|
||||||
|
# otherwise, add the token to the beginning of the phrase
|
||||||
else:
|
else:
|
||||||
print("No noun follows a verb in the text")
|
phrase.insert(0, t.text)
|
||||||
|
|
||||||
|
phrase_text = " ".join(phrase)
|
||||||
|
exploitable_phrases.append(phrase_text)
|
||||||
|
|
||||||
|
return exploitable_phrases
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Test the function
|
||||||
|
sentences = [
|
||||||
|
"I am testing for bugs.",
|
||||||
|
"She was speaking at a conference.",
|
||||||
|
"He is looking into the issue.",
|
||||||
|
"They are working on the project.",
|
||||||
|
"Apple is looking at buying U.K. startup for $1 billion",
|
||||||
|
]
|
||||||
|
|
||||||
|
for sentence in sentences:
|
||||||
|
phrases = find_exploitable_phrases(sentence)
|
||||||
|
for phrase in phrases:
|
||||||
|
print(f"Original phrase: {sentence}")
|
||||||
|
print(f"Exploitable phrase: {phrase} deez")
|
||||||
|
print()
|
||||||
|
Loading…
Reference in New Issue
Block a user