| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250 |
- import logging
- import os
- import random
- from textblob import TextBlob
- from config import FILTER_WORDS
- # See: https://pythonhosted.org/chatbot/
- # See: https://www.codeproject.com/Articles/36106/Chatbot-Tutorial
- # See: https://www.smallsurething.com/implementing-the-famous-eliza-chatbot-in-python/
- os.environ['NLTK_DATA'] = os.getcwd() + '/nltk_data'
- logging.basicConfig()
- logger = logging.getLogger()
- logger.setLevel(logging.DEBUG)
- # Sentences we'll respond with if the user greeted us
- GREETING_KEYWORDS = ("salut", "bonjour", "yo", "hello",)
- GREETING_RESPONSES = ["Bonjour monsieur."]
- def check_for_greeting(sentence):
- """If any of the words in the user's input was a greeting, return a greeting response"""
- for word in sentence.words:
- if word.lower() in GREETING_KEYWORDS:
- return random.choice(GREETING_RESPONSES)
- # Sentences we'll respond with if we have no idea what the user just said
- NONE_RESPONSES = [
- "Excusez-moi, je ne comprend pas.",
- "Je vous demande pardon?"
- ]
- # If the user tries to tell us something about ourselves, use one of these responses
- COMMENTS_ABOUT_SELF = [
- "Je ne suis qu'un homme. Attendez... non.",
- "Je ne fais que mon travail Monsieur.",
- ]
- class UnacceptableUtteranceException(Exception):
- """Raise this (uncaught) exception if the response was going to trigger our blacklist"""
- pass
- def starts_with_vowel(word):
- """Check for pronoun compability -- 'a' vs. 'an'"""
- return True if word[0] in 'aeiou' else False
- def answer_to(sentence):
- """Main program loop: select a response for the input sentence and return it"""
- logger.debug("> respond to %s", sentence)
- resp = respond(sentence)
- return resp
- def find_pronoun(sent):
- """Given a sentence, find a preferred pronoun to respond with. Returns None if no candidate
- pronoun is found in the input"""
- pronoun = None
- for word, part_of_speech in sent.pos_tags:
- # Disambiguate pronouns
- if part_of_speech == 'PRP' and word.lower() == 'you':
- pronoun = 'I'
- elif part_of_speech == 'PRP' and word == 'I':
- # If the user mentioned themselves, then they will definitely be the pronoun
- pronoun = 'You'
- return pronoun
- def find_verb(sent):
- """Pick a candidate verb for the sentence."""
- verb = None
- pos = None
- for word, part_of_speech in sent.pos_tags:
- if part_of_speech.startswith('VB'): # This is a verb
- verb = word
- pos = part_of_speech
- break
- return verb, pos
- def find_noun(sent):
- """Given a sentence, find the best candidate noun."""
- noun = None
- if not noun:
- for w, p in sent.pos_tags:
- if p == 'NN': # This is a noun
- noun = w
- break
- if noun:
- logger.info("Found noun: %s", noun)
- return noun
- def find_adjective(sent):
- """Given a sentence, find the best candidate adjective."""
- adj = None
- for w, p in sent.pos_tags:
- if p == 'JJ': # This is an adjective
- adj = w
- break
- return adj
- def construct_response(pronoun, noun, verb):
- """No special cases matched, so we're going to try to construct a full sentence that uses as much
- of the user's input as possible"""
- resp = []
- if pronoun:
- resp.append(pronoun)
- # We always respond in the present tense, and the pronoun will always either be a passthrough
- # from the user, or 'you' or 'I', in which case we might need to change the tense for some
- # irregular verbs.
- if verb:
- verb_word = verb[0]
- if verb_word in ('être', 'am', 'is', "'m"): # This would be an excellent place to use lemmas!
- if pronoun.lower() == 'you':
- resp.append("aren't really")
- else:
- resp.append(verb_word)
- if noun:
- pronoun = "an" if starts_with_vowel(noun) else "a"
- resp.append(pronoun + " " + noun)
- resp.append(random.choice(("tho", "bro", "lol", "bruh", "smh", "")))
- return " ".join(resp)
- def check_for_comment_about_bot(pronoun, noun, adjective):
- """Check if the user's input was about the bot itself, in which case try to fashion a response
- that feels right based on their input. Returns the new best sentence, or None."""
- resp = None
- if pronoun == 'I' and (noun or adjective):
- if noun:
- if random.choice((True, False)):
- resp = random.choice(SELF_VERBS_WITH_NOUN_CAPS_PLURAL).format(**{'noun': noun.pluralize().capitalize()})
- else:
- resp = random.choice(SELF_VERBS_WITH_NOUN_LOWER).format(**{'noun': noun})
- else:
- resp = random.choice(SELF_VERBS_WITH_ADJECTIVE).format(**{'adjective': adjective})
- return resp
- # Template for responses that include a direct noun which is indefinite/uncountable
- SELF_VERBS_WITH_NOUN_CAPS_PLURAL = [
- "My last startup totally crushed the {noun} vertical",
- "Were you aware I was a serial entrepreneur in the {noun} sector?",
- "My startup is Uber for {noun}",
- "I really consider myself an expert on {noun}",
- ]
- SELF_VERBS_WITH_NOUN_LOWER = [
- "Yeah but I know a lot about {noun}",
- "My bros always ask me about {noun}",
- ]
- SELF_VERBS_WITH_ADJECTIVE = [
- "I'm personally building the {adjective} Economy",
- "I consider myself to be a {adjective}preneur",
- ]
- # end
- def preprocess_text(sentence):
- """Handle some weird edge cases in parsing, like 'i' needing to be capitalized
- to be correctly identified as a pronoun"""
- cleaned = []
- words = sentence.split(' ')
- for w in words:
- if w == 'i':
- w = 'I'
- if w == "i'm":
- w = "I'm"
- cleaned.append(w)
- return ' '.join(cleaned)
- # start:example-respond.py
- def respond(sentence):
- """Parse the user's inbound sentence and find candidate terms that make up a best-fit response"""
- cleaned = preprocess_text(sentence)
- parsed = TextBlob(cleaned)
- # Loop through all the sentences, if more than one. This will help extract the most relevant
- # response text even across multiple sentences (for example if there was no obvious direct noun
- # in one sentence
- pronoun, noun, adjective, verb = find_candidate_parts_of_speech(parsed)
- # If we said something about the bot and used some kind of direct noun, construct the
- # sentence around that, discarding the other candidates
- resp = check_for_comment_about_bot(pronoun, noun, adjective)
- # If we just greeted the bot, we'll use a return greeting
- if not resp:
- resp = check_for_greeting(parsed)
- if not resp:
- # If we didn't override the final sentence, try to construct a new one:
- if not pronoun:
- resp = random.choice(NONE_RESPONSES)
- elif pronoun == 'I' and not verb:
- resp = random.choice(COMMENTS_ABOUT_SELF)
- else:
- resp = construct_response(pronoun, noun, verb)
- # If we got through all that with nothing, use a random response
- if not resp:
- resp = random.choice(NONE_RESPONSES)
- logger.info("Returning phrase '%s'", resp)
- # Check that we're not going to say anything obviously offensive
- filter_response(resp)
- return resp
- def find_candidate_parts_of_speech(parsed):
- """Given a parsed input, find the best pronoun, direct noun, adjective, and verb to match their input.
- Returns a tuple of pronoun, noun, adjective, verb any of which may be None if there was no good match"""
- pronoun = None
- noun = None
- adjective = None
- verb = None
- for sent in parsed.sentences:
- pronoun = find_pronoun(sent)
- noun = find_noun(sent)
- adjective = find_adjective(sent)
- verb = find_verb(sent)
- logger.info("Pronoun=%s, noun=%s, adjective=%s, verb=%s", pronoun, noun, adjective, verb)
- return pronoun, noun, adjective, verb
- def filter_response(resp):
- """Don't allow any words to match our filter list"""
- tokenized = resp.split(' ')
- for word in tokenized:
- if '@' in word or '#' in word or '!' in word:
- raise UnacceptableUtteranceException()
- for s in FILTER_WORDS:
- if word.lower().startswith(s):
- raise UnacceptableUtteranceException()
- # end
- if __name__ == '__main__':
- msg = ""
- print("Oui monsieur?")
- while msg != ".":
- msg = input("> ")
- print(answer_to(msg))
|