在本文中,将探讨如何使用ChatGPT API来实现与浏览器的语音和文本交互。这包括将语音转换为文本(语音识别)以及将文本转换为语音(文本转语音)的功能。通过这种方式,用户可以与浏览器进行自然的对话,而浏览器也能够以语音的形式回应用户。
首先,需要从获取OPENAI_API_KEY。这是与OpenAI API进行交互所必需的。
在项目中,打开名为ChatGPT.js的JavaScript文件,并在第一行添加API密钥。请注意,在实际应用中,这个密钥应该被加密存储以保证安全。
代码主要使用XMLHttpRequest来向OpenAI发送JSON请求。请求的端点是:
https://api.openai.com/v1/completions
以下是ChatGPT.js的代码示例:
var OPENAI_API_KEY = "";
var bTextToSpeechSupported = false;
var bSpeechInProgress = false;
var oSpeechRecognizer = null;
var oSpeechSynthesisUtterance = null;
var oVoices = null;
function OnLoad() {
if ("webkitSpeechRecognition" in window) {
// 语音识别支持
} else {
// 语音识别不支持
lblSpeak.style.display = "none";
}
if ('speechSynthesis' in window) {
bTextToSpeechSupported = true;
speechSynthesis.onvoiceschanged = function() {
oVoices = window.speechSynthesis.getVoices();
for (var i = 0; i < oVoices.length; i++) {
selVoices[selVoices.length] = new Option(oVoices[i].name, i);
}
};
}
}
function ChangeLang(o) {
if (oSpeechRecognizer) {
oSpeechRecognizer.lang = selLang.value;
// 语音转文本
}
}
function Send() {
var sQuestion = txtMsg.value;
if (sQuestion == "") {
alert("请输入问题!");
txtMsg.focus();
return;
}
spMsg.innerHTML = "ChatGPT正在思考...";
var sUrl = "https://api.openai.com/v1/completions";
var sModel = selModel.value;
// "text-davinci-003";
if (sModel.indexOf("gpt-3.5-turbo") != -1) {
// https://openai.com/research/gpt-4
sUrl = "https://api.openai.com/v1/chat/completions";
}
var oHttp = new XMLHttpRequest();
oHttp.open("POST", sUrl);
oHttp.setRequestHeader("Accept", "application/json");
oHttp.setRequestHeader("Content-Type", "application/json");
oHttp.setRequestHeader("Authorization", "Bearer " + OPENAI_API_KEY);
oHttp.onreadystatechange = function() {
if (oHttp.readyState === 4) {
// console.log(oHttp.status);
spMsg.innerHTML = "";
var oJson = {};
if (txtOutput.value != "") txtOutput.value += "\n";
try {
oJson = JSON.parse(oHttp.responseText);
} catch (ex) {
txtOutput.value += "错误: " + ex.message;
}
if (oJson.error && oJson.error.message) {
txtOutput.value += "错误: " + oJson.error.message;
} else if (oJson.choices) {
var s = "";
if (oJson.choices[0].text) {
s = oJson.choices[0].text;
} else if (oJson.choices[0].message) {
// GPT-4
s = oJson.choices[0].message.content;
}
if (selLang.value != "en-US") {
var a = s.split("?\n");
if (a.length == 2) {
s = a[1];
}
}
if (s == "") {
s = "无回应";
} else {
txtOutput.value += "ChatGPT: " + s;
TextToSpeech(s);
}
}
};
};
var iMaxTokens = 2048;
var sUserId = "1";
var dTemperature = 0.5;
var data = {
model: sModel,
prompt: sQuestion,
max_tokens: iMaxTokens,
user: sUserId,
temperature: dTemperature,
frequency_penalty: 0.0,
// 数值介于-2.0和2.0之间
// 正值减少模型重复相同行的可能性
presence_penalty: 0.0,
// 数值介于-2.0和2.0之间。
// 正值增加模型谈论新话题的可能性
stop: [
"#",
";"
]
// API将停止生成更多标记的最多4个序列。返回的文本不会包含停止序列。
};
// chat GPT-4 gpt-4
if (sModel.indexOf("gpt-3.5-turbo") != -1) {
data = {
"model": sModel,
"messages": [
// {
// "role": "system",
// "content": "是一个有用的助手。"
// 助手消息有助于存储先前的回答
// },
{
"role": "user",
// system,user,assistant
"content": sQuestion
}
]
};
}
oHttp.send(JSON.stringify(data));
if (txtOutput.value != "") txtOutput.value += "\n";
txtOutput.value += ": " + sQuestion;
txtMsg.value = "";
}
function TextToSpeech(s) {
if (bTextToSpeechSupported == false) return;
if (chkMute.checked) return;
oSpeechSynthesisUtterance = new SpeechSynthesisUtterance();
if (oVoices) {
var sVoice = selVoices.value;
if (sVoice != "") {
oSpeechSynthesisUtterance.voice = oVoices[parseInt(sVoice)];
}
}
oSpeechSynthesisUtterance.onend = function() {
// 说完了 - 现在可以听了
if (oSpeechRecognizer && chkSpeak.checked) {
oSpeechRecognizer.start();
}
}
if (oSpeechRecognizer && chkSpeak.checked) {
// 当说话时不要听自己
oSpeechRecognizer.stop();
}
oSpeechSynthesisUtterance.lang = selLang.value;
oSpeechSynthesisUtterance.text = s;
// Uncaught (in promise) Error: A listener indicated an asynchronous response
// by returning true, but the message channel closed
window.speechSynthesis.speak(oSpeechSynthesisUtterance);
}
function Mute(b) {
if (b) {
selVoices.style.display = "none";
} else {
selVoices.style.display = "";
}
}
function SpeechToText() {
if (oSpeechRecognizer) {
if (chkSpeak.checked) {
oSpeechRecognizer.start();
} else {
oSpeechRecognizer.stop();
}
return;
}
oSpeechRecognizer = new webkitSpeechRecognition();
oSpeechRecognizer.continuous = true;
oSpeechRecognizer.interimResults = true;
oSpeechRecognizer.lang = selLang.value;
oSpeechRecognizer.start();
oSpeechRecognizer.onresult = function(event) {
var interimTranscripts = "";
for (var i = event.resultIndex; i < event.results.length; i++) {
var transcript = event.results[i][0].transcript;
if (event.results[i].isFinal) {
txtMsg.value = transcript;
Send();
} else {
transcript.replace("\n", "");
interimTranscripts += transcript;
}
var oDiv = document.getElementById("idText");
oDiv.innerHTML = '' + interimTranscripts + '';
}
};
oSpeechRecognizer.onerror = function(event) {
};
}
以下是HTML页面的代码示例:
<!DOCTYPE html>
<html>
<head>
<title>ChatGPT</title>
<script src="ChatGPT.js?v=15"></script>
</head>
<body onload="OnLoad()">
<div id="idContainer">
<textarea id="txtOutput" rows="10" style="margin-top: 10px; width: 100%;" placeholder="输出"></textarea>
<div>
<button type="button" onclick="Send()" id="btnSend">发送</button>
<label id="lblSpeak">
<input id="chkSpeak" type="checkbox" onclick="SpeechToText()" />
监听
</label>
<label id="lblMute">
<input id="chkMute" type="checkbox" onclick="Mute(this.checked)" />
静音
</label>
<select id="selModel">
<option value="text-davinci-003">text-davinci-003</option>
<option value="text-davinci-002">text-davinci-002</option>
<option value="code-davinci-002">code-davinci-002</option>
<option value="gpt-3.5-turbo">gpt-3.5-turbo</option>
<option value="gpt-3.5-turbo-0301">gpt-3.5-turbo-0301</option>
</select>
<select id="selLang" onchange="ChangeLang(this)">
<option value="en-US">英语(美国)</option>
<option value="fr-FR">法语(法国)</option>
<option value="ru-RU">俄语(俄罗斯)</option>
<option value="pt-BR">葡萄牙语(巴西)</option>
<option value="es-ES">西班牙语(西班牙)</option>
<option value="de-DE">德语(德国)</option>
<option value="it-IT">意大利语(意大利)</option>
<option value="pl-PL">波兰语(波兰)</option>
<option value="nl-NL">荷兰语(荷兰)</option>
</select>
<select id="selVoices">
</select>
<span id="spMsg">
</span>
</div>
<textarea id="txtMsg" rows="5" wrap="soft" style="width: 98%; margin-left: 3px; margin-top: 6px" placeholder="输入文本"></textarea>
<div id="idText">
</div>
</div>
</body>
</html>
并非所有浏览器都支持语音识别和文本转语音功能。Chrome和Edge似乎支持这些功能,而Firefox似乎只支持文本转语音。
2022年12月25日:创建版本1