bot_1.py 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250
  1. import logging
  2. import os
  3. import random
  4. from textblob import TextBlob
  5. from config import FILTER_WORDS
  6. # See: https://pythonhosted.org/chatbot/
  7. # See: https://www.codeproject.com/Articles/36106/Chatbot-Tutorial
  8. # See: https://www.smallsurething.com/implementing-the-famous-eliza-chatbot-in-python/
  9. os.environ['NLTK_DATA'] = os.getcwd() + '/nltk_data'
  10. logging.basicConfig()
  11. logger = logging.getLogger()
  12. logger.setLevel(logging.DEBUG)
  13. # Sentences we'll respond with if the user greeted us
  14. GREETING_KEYWORDS = ("salut", "bonjour", "yo", "hello",)
  15. GREETING_RESPONSES = ["Bonjour monsieur."]
  16. def check_for_greeting(sentence):
  17. """If any of the words in the user's input was a greeting, return a greeting response"""
  18. for word in sentence.words:
  19. if word.lower() in GREETING_KEYWORDS:
  20. return random.choice(GREETING_RESPONSES)
  21. # Sentences we'll respond with if we have no idea what the user just said
  22. NONE_RESPONSES = [
  23. "Excusez-moi, je ne comprend pas.",
  24. "Je vous demande pardon?"
  25. ]
  26. # If the user tries to tell us something about ourselves, use one of these responses
  27. COMMENTS_ABOUT_SELF = [
  28. "Je ne suis qu'un homme. Attendez... non.",
  29. "Je ne fais que mon travail Monsieur.",
  30. ]
  31. class UnacceptableUtteranceException(Exception):
  32. """Raise this (uncaught) exception if the response was going to trigger our blacklist"""
  33. pass
  34. def starts_with_vowel(word):
  35. """Check for pronoun compability -- 'a' vs. 'an'"""
  36. return True if word[0] in 'aeiou' else False
  37. def answer_to(sentence):
  38. """Main program loop: select a response for the input sentence and return it"""
  39. logger.debug("> respond to %s", sentence)
  40. resp = respond(sentence)
  41. return resp
  42. def find_pronoun(sent):
  43. """Given a sentence, find a preferred pronoun to respond with. Returns None if no candidate
  44. pronoun is found in the input"""
  45. pronoun = None
  46. for word, part_of_speech in sent.pos_tags:
  47. # Disambiguate pronouns
  48. if part_of_speech == 'PRP' and word.lower() == 'you':
  49. pronoun = 'I'
  50. elif part_of_speech == 'PRP' and word == 'I':
  51. # If the user mentioned themselves, then they will definitely be the pronoun
  52. pronoun = 'You'
  53. return pronoun
  54. def find_verb(sent):
  55. """Pick a candidate verb for the sentence."""
  56. verb = None
  57. pos = None
  58. for word, part_of_speech in sent.pos_tags:
  59. if part_of_speech.startswith('VB'): # This is a verb
  60. verb = word
  61. pos = part_of_speech
  62. break
  63. return verb, pos
  64. def find_noun(sent):
  65. """Given a sentence, find the best candidate noun."""
  66. noun = None
  67. if not noun:
  68. for w, p in sent.pos_tags:
  69. if p == 'NN': # This is a noun
  70. noun = w
  71. break
  72. if noun:
  73. logger.info("Found noun: %s", noun)
  74. return noun
  75. def find_adjective(sent):
  76. """Given a sentence, find the best candidate adjective."""
  77. adj = None
  78. for w, p in sent.pos_tags:
  79. if p == 'JJ': # This is an adjective
  80. adj = w
  81. break
  82. return adj
  83. def construct_response(pronoun, noun, verb):
  84. """No special cases matched, so we're going to try to construct a full sentence that uses as much
  85. of the user's input as possible"""
  86. resp = []
  87. if pronoun:
  88. resp.append(pronoun)
  89. # We always respond in the present tense, and the pronoun will always either be a passthrough
  90. # from the user, or 'you' or 'I', in which case we might need to change the tense for some
  91. # irregular verbs.
  92. if verb:
  93. verb_word = verb[0]
  94. if verb_word in ('être', 'am', 'is', "'m"): # This would be an excellent place to use lemmas!
  95. if pronoun.lower() == 'you':
  96. resp.append("aren't really")
  97. else:
  98. resp.append(verb_word)
  99. if noun:
  100. pronoun = "an" if starts_with_vowel(noun) else "a"
  101. resp.append(pronoun + " " + noun)
  102. resp.append(random.choice(("tho", "bro", "lol", "bruh", "smh", "")))
  103. return " ".join(resp)
  104. def check_for_comment_about_bot(pronoun, noun, adjective):
  105. """Check if the user's input was about the bot itself, in which case try to fashion a response
  106. that feels right based on their input. Returns the new best sentence, or None."""
  107. resp = None
  108. if pronoun == 'I' and (noun or adjective):
  109. if noun:
  110. if random.choice((True, False)):
  111. resp = random.choice(SELF_VERBS_WITH_NOUN_CAPS_PLURAL).format(**{'noun': noun.pluralize().capitalize()})
  112. else:
  113. resp = random.choice(SELF_VERBS_WITH_NOUN_LOWER).format(**{'noun': noun})
  114. else:
  115. resp = random.choice(SELF_VERBS_WITH_ADJECTIVE).format(**{'adjective': adjective})
  116. return resp
  117. # Template for responses that include a direct noun which is indefinite/uncountable
  118. SELF_VERBS_WITH_NOUN_CAPS_PLURAL = [
  119. "My last startup totally crushed the {noun} vertical",
  120. "Were you aware I was a serial entrepreneur in the {noun} sector?",
  121. "My startup is Uber for {noun}",
  122. "I really consider myself an expert on {noun}",
  123. ]
  124. SELF_VERBS_WITH_NOUN_LOWER = [
  125. "Yeah but I know a lot about {noun}",
  126. "My bros always ask me about {noun}",
  127. ]
  128. SELF_VERBS_WITH_ADJECTIVE = [
  129. "I'm personally building the {adjective} Economy",
  130. "I consider myself to be a {adjective}preneur",
  131. ]
  132. # end
  133. def preprocess_text(sentence):
  134. """Handle some weird edge cases in parsing, like 'i' needing to be capitalized
  135. to be correctly identified as a pronoun"""
  136. cleaned = []
  137. words = sentence.split(' ')
  138. for w in words:
  139. if w == 'i':
  140. w = 'I'
  141. if w == "i'm":
  142. w = "I'm"
  143. cleaned.append(w)
  144. return ' '.join(cleaned)
  145. # start:example-respond.py
  146. def respond(sentence):
  147. """Parse the user's inbound sentence and find candidate terms that make up a best-fit response"""
  148. cleaned = preprocess_text(sentence)
  149. parsed = TextBlob(cleaned)
  150. # Loop through all the sentences, if more than one. This will help extract the most relevant
  151. # response text even across multiple sentences (for example if there was no obvious direct noun
  152. # in one sentence
  153. pronoun, noun, adjective, verb = find_candidate_parts_of_speech(parsed)
  154. # If we said something about the bot and used some kind of direct noun, construct the
  155. # sentence around that, discarding the other candidates
  156. resp = check_for_comment_about_bot(pronoun, noun, adjective)
  157. # If we just greeted the bot, we'll use a return greeting
  158. if not resp:
  159. resp = check_for_greeting(parsed)
  160. if not resp:
  161. # If we didn't override the final sentence, try to construct a new one:
  162. if not pronoun:
  163. resp = random.choice(NONE_RESPONSES)
  164. elif pronoun == 'I' and not verb:
  165. resp = random.choice(COMMENTS_ABOUT_SELF)
  166. else:
  167. resp = construct_response(pronoun, noun, verb)
  168. # If we got through all that with nothing, use a random response
  169. if not resp:
  170. resp = random.choice(NONE_RESPONSES)
  171. logger.info("Returning phrase '%s'", resp)
  172. # Check that we're not going to say anything obviously offensive
  173. filter_response(resp)
  174. return resp
  175. def find_candidate_parts_of_speech(parsed):
  176. """Given a parsed input, find the best pronoun, direct noun, adjective, and verb to match their input.
  177. Returns a tuple of pronoun, noun, adjective, verb any of which may be None if there was no good match"""
  178. pronoun = None
  179. noun = None
  180. adjective = None
  181. verb = None
  182. for sent in parsed.sentences:
  183. pronoun = find_pronoun(sent)
  184. noun = find_noun(sent)
  185. adjective = find_adjective(sent)
  186. verb = find_verb(sent)
  187. logger.info("Pronoun=%s, noun=%s, adjective=%s, verb=%s", pronoun, noun, adjective, verb)
  188. return pronoun, noun, adjective, verb
  189. def filter_response(resp):
  190. """Don't allow any words to match our filter list"""
  191. tokenized = resp.split(' ')
  192. for word in tokenized:
  193. if '@' in word or '#' in word or '!' in word:
  194. raise UnacceptableUtteranceException()
  195. for s in FILTER_WORDS:
  196. if word.lower().startswith(s):
  197. raise UnacceptableUtteranceException()
  198. # end
  199. if __name__ == '__main__':
  200. msg = ""
  201. print("Oui monsieur?")
  202. while msg != ".":
  203. msg = input("> ")
  204. print(answer_to(msg))