利用NLP实现一个简单的问答系统,类似AI客服
这次项目是我实习的过程里做的,只用了一天时间,大部分代码由AI完成里 相关的文本数据放在data.xlsx,声明一下格式 问题(1个)+类似问题(3个,用换行分隔)+答案
运行图片
python部分
使用Flask提供网页服务,Scikit-Learn提供机器学习工具,Jieba用于中文文本分词
代码展示 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 import pandas as pdfrom sklearn.feature_extraction.text import TfidfVectorizerfrom sklearn.naive_bayes import MultinomialNBfrom sklearn.pipeline import make_pipelinefrom sklearn.metrics.pairwise import cosine_similarityimport jiebafrom flask import Flask,request,jsonify,render_templateapp=Flask(__name__) def find_distinct_top_three_questions (user_query, qa_pairs ): """ 根据用户输入查询,从问题回答对中找出最匹配的三个不同的问题。 参数: user_query (str): 用户的查询字符串。 qa_pairs (list of tuples): 问题回答对的列表,每个元素是一个包含问题和回答的元组。 返回: list: 最匹配的三个不同的问题组成的列表。 """ qa_df = pd.DataFrame(qa_pairs, columns=['问题' , '回答' ]) qa_df = qa_df.drop('回答' , axis=1 ) qa_df['segmented_question' ] = qa_df['问题' ].apply(lambda x: ' ' .join(jieba.cut(x))) segmented_query = ' ' .join(jieba.cut(user_query)) vectorizer = TfidfVectorizer(use_idf=True ) qa_tfidf = vectorizer.fit_transform(qa_df['segmented_question' ]) query_tfidf = vectorizer.transform([segmented_query]) similarities = cosine_similarity(query_tfidf, qa_tfidf) top_indices = similarities.argsort(axis=1 , kind='quicksort' )[0 ][-len (qa_df):][::-1 ] distinct_top_questions = [] top_index = similarities.argmax() if top_index < similarities.shape[0 ]: if (similarities[top_index] < 0.5 ).any (): distinct_top_questions.append("你好,我是智能助手,请问有什么需要我帮助的吗?" ) return distinct_top_questions else : print ("Warning: Top index is out of bounds." ) for index in top_indices: if qa_df.loc[index, '问题' ] not in distinct_top_questions: distinct_top_questions.append(qa_df.loc[index, '问题' ]) if len (distinct_top_questions) == 3 : break while len (distinct_top_questions) < 3 and len (distinct_top_questions) < len (qa_df): distinct_top_questions.append(qa_df.loc[top_indices[len (distinct_top_questions)], '问题' ]) return distinct_top_questions @app.route('/' ) def home (): return render_template('index.html' ) @app.route('/query' , methods=['POST' ] ) def query (): user_query = request.form.get('query' ) temp_list=[] for item in data_list: if user_query.strip() == item['问题' ].strip(): temp_list.append(item['回答' ]) return jsonify(temp_list) results = find_distinct_top_three_questions(user_query,data_list) return jsonify(results) if __name__ == '__main__' : xls=pd.read_excel('data.xlsx' ) data_list = [] for index, row in xls.iterrows(): question = row[0 ] answer = row[2 ] data_list.append({'问题' : question, '回答' : answer}) similar_questions = row[1 ].split('\n' ) if not pd.isna(row[1 ]) else [] for similar_question in similar_questions: if similar_question.strip(): data_list.append({'问题' : similar_question, '回答' : answer}) texts, labels = zip (*data_list) model = make_pipeline(TfidfVectorizer(), MultinomialNB()) model.fit(texts, labels) app.run(debug=True )
网页部分
这里真的,全是AI写的,只能说AI真的好用
代码展示 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 <!DOCTYPE html > <html lang ="zh-CN" > <head > <meta charset ="UTF-8" > <title > 智能问答系统</title > <style > body { margin : 0 ; padding-bottom : 70px ; display : flex; flex-direction : column; min-height : 100vh ; } h1 { text-align : center; background-color : #f0f0f0 ; padding : 10px 0 ; margin-bottom : 0 ; position : sticky; top : 0 ; z-index : 100 ; } #results { flex-grow : 1 ; overflow-y : auto; padding : 20px ; box-sizing : border-box; display : flex; flex-direction : column; gap : 10px ; } .chat-bubble { padding : 10px ; border-radius : 5px ; position : relative; word-wrap : break-word; max-width : 70% ; } .user-bubble { background-color : #DDEAFD ; align-self : flex-end; } .bot-bubble { background-color : #007BFF ; color : white; align-self : flex-start; } .bubble-arrow ::before { content : "" ; position : absolute; width : 0 ; height : 0 ; border-style : solid; } .user-bubble .bubble-arrow ::before { right : 0 ; border-width : 10px 0 10px 10px ; border-color : transparent transparent transparent #DDEAFD ; bottom : calc (50% - 10px ); transform : translateY (50% ); } .bot-bubble .bubble-arrow ::before { left : 0 ; border-width : 10px 10px 10px 0 ; border-color : transparent #007BFF transparent transparent; bottom : calc (50% - 10px ); transform : translateY (50% ); } form { position : fixed; bottom : 0 ; width : 100% ; background-color : #f0f0f0 ; padding : 10px 20px ; box-sizing : border-box; display : flex; justify-content : center; align-items : center; } input [type="text" ] { width : 70% ; padding : 10px ; border : 1px solid #ccc ; border-radius : 5px ; } button { padding : 10px 20px ; border : none; background-color : #007BFF ; color : white; border-radius : 5px ; cursor : pointer; margin-left : 10px ; } button :hover { background-color : #0056b3 ; } </style > </head > <body > <h1 > 智能系统</h1 > <div id ="results" > </div > <form id ="queryForm" > <input type ="text" id ="userQuery" name ="query" placeholder ="请输入您的问题..." > <button type ="submit" > 提交</button > </form > <script > document .getElementById ('queryForm' ).addEventListener ('submit' , function (event ) { event.preventDefault (); const userQuery = document .getElementById ('userQuery' ).value ; fetch ('/query' , { method : 'POST' , headers : { 'Content-Type' : 'application/x-www-form-urlencoded' , }, body : new URLSearchParams ({ query : userQuery }), }) .then (response => response.json ()) .then (data => { const resultsDiv = document .getElementById ('results' ); const userBubble = document .createElement ('div' ); userBubble.className = 'chat-bubble user-bubble' ; userBubble.innerHTML = `<span class="bubble-arrow"></span>${userQuery} ` ; resultsDiv.appendChild (userBubble); if (Array .isArray (data)) { data.forEach ((option, index ) => { const optionElement = document .createElement ('div' ); optionElement.className = 'chat-bubble bot-bubble option' ; optionElement.innerHTML = `<span class="bubble-arrow"></span><button type="button" onclick="sendOption('${option} ')">${option} </button>` ; optionElement.setAttribute ('data-index' , index); resultsDiv.appendChild (optionElement); }); } else { const bubble = document .createElement ('div' ); bubble.className = 'chat-bubble bot-bubble' ; bubble.innerHTML = `<span class="bubble-arrow"></span>${data} ` ; resultsDiv.appendChild (bubble); } document .getElementById ('userQuery' ).value = '' ; resultsDiv.scrollTop = resultsDiv.scrollHeight ; }) .catch (error => console .error ('Error:' , error)); const observer = new MutationObserver (() => { resultsDiv.scrollTop = resultsDiv.scrollHeight ; }); observer.observe (resultsDiv, { childList : true , subtree : true }); }); function sendOption (optionText ) { document .getElementById ('userQuery' ).value = optionText; } </script > </body > </html >