Unity 使用百度语音进行语音识别

发表于2018-02-11
评论0 5.1k浏览
如何在开发过程中使用百度语音进行语音识别?只需要接入百度语音的API接口即可。

新建脚本,将下列代码复制进去:
using LitJson;
using System;
using System.Collections;
using System.Collections.Generic;
using System.IO;
using System.Text;
using UnityEngine;
using UnityEngine.UI;
public class ToWord : MonoBehaviour {
    private string token= "";                           //access_token
    private string cuid = "11";        //用户标识
    private string format = "wav";                  //语音格式
    private int rate = 8000;                        //采样率
    private int channel = 1;                        //声道数
    private string speech;                          //语音数据,进行base64编码
    private int len;                                //原始语音长度
    private string lan = "zh";                      //语种
    private string grant_Type = "client_credentials";
    private string client_ID = "这里输入百度的appkey,自己到官网申请填入这里";  //百度appkey
    private string client_Secret = "这里输入百度secretkey,自己到官网申请填写";  //百度Secret Key
    private string baiduAPI = "http://vop.baidu.com/server_api";
    private string getTokenAPIPath = "https://openapi.baidu.com/oauth/2.0/token";
    private byte[] clipByte;
    /// <summary>
    /// 转换出来的TEXT
    /// </summary>
    public static string audioToString;
    public  AudioSource aud;
    private int audioLength;//录音的长度
    public delegate void CallBack(string name);
    public delegate string ds();
    private static ToWord _toWord;
    public MicroPhoneManager m;
    private void Awake()
    {
        _toWord = this;
        StartCoroutine(GetToken(getTokenAPIPath));
    }
    public static ToWord GetInstance() {
        return _toWord;
    }
    /// <summary>
    /// 获取百度用户令牌
    /// </summary>
    /// <param name="url">获取的url</param>
    /// <returns></returns>
    private IEnumerator GetToken(string url)
    {
        WWWForm getTForm = new WWWForm();
        getTForm.AddField("grant_type", grant_Type);
        getTForm.AddField("client_id", client_ID);
        getTForm.AddField("client_secret", client_Secret);
        WWW getTW = new WWW(url, getTForm);
        yield return getTW;
        if (getTW.isDone)
        {
            if (getTW.error == null)
            {
                token = JsonMapper.ToObject(getTW.text)["access_token"].ToString();
                Debug.Log("获取百度用户令牌 初始化完成");
            }
            else
                Debug.Log("error:" + getTW.error);
        }
    }
    /// <summary>
    /// 开始录音
    /// </summary>
    public void StartMic(int durationTime)
    {
        if (Microphone.devices.Length == 0) return;
        Microphone.End(null);
        Debug.Log("Start");
        aud.clip = Microphone.Start(null, false, durationTime, rate);
    }
    /// <summary>
    /// 结束录音
    /// </summary>
    public void EndMic(CallBack cb,BtnInfo info)
    {
        int lastPos = Microphone.GetPosition(null);
        if (Microphone.IsRecording(null))
            audioLength = lastPos / rate;//录音时长  
        else
            audioLength = 10;
        Debug.Log("录音结束");
        Microphone.End(null);
        clipByte = GetClipData();
        len = clipByte.Length;
        speech = Convert.ToBase64String(clipByte);
        using (FileStream fs = CreateEmpty(Utils.GetAudioDataPath() + "/" + info.ID + "_1.wav"))
        {
            ConvertAndWrite(fs, aud.clip);
            WriteHeader(fs, aud.clip);
            Debug.Log("保存成功");
        }
        StartCoroutine(GetAudioString(baiduAPI, cb));
    }
    void aaa(string str) { }
    private void WriteHeader(FileStream stream, AudioClip clip)
    {
        int hz = clip.frequency;
        int channels = clip.channels;
        int samples = clip.samples;
        stream.Seek(0, SeekOrigin.Begin);
        Byte[] riff = System.Text.Encoding.UTF8.GetBytes("RIFF");
        stream.Write(riff, 0, 4);
        Byte[] chunkSize = BitConverter.GetBytes(stream.Length - 8);
        stream.Write(chunkSize, 0, 4);
        Byte[] wave = System.Text.Encoding.UTF8.GetBytes("WAVE");
        stream.Write(wave, 0, 4);
        Byte[] fmt = System.Text.Encoding.UTF8.GetBytes("fmt ");
        stream.Write(fmt, 0, 4);
        Byte[] subChunk1 = BitConverter.GetBytes(16);
        stream.Write(subChunk1, 0, 4);
        UInt16 two = 2;
        UInt16 one = 1;
        Byte[] audioFormat = BitConverter.GetBytes(one);
        stream.Write(audioFormat, 0, 2);
        Byte[] numChannels = BitConverter.GetBytes(channels);
        stream.Write(numChannels, 0, 2);
        Byte[] sampleRate = BitConverter.GetBytes(hz);
        stream.Write(sampleRate, 0, 4);
        Byte[] byteRate = BitConverter.GetBytes(hz * channels * 2); // sampleRate * bytesPerSample*number of channels, here 44100*2*2  
        stream.Write(byteRate, 0, 4);
        UInt16 blockAlign = (ushort)(channels * 2);
        stream.Write(BitConverter.GetBytes(blockAlign), 0, 2);
        UInt16 bps = 16;
        Byte[] bitsPerSample = BitConverter.GetBytes(bps);
        stream.Write(bitsPerSample, 0, 2);
        Byte[] datastring = System.Text.Encoding.UTF8.GetBytes("data");
        stream.Write(datastring, 0, 4);
        Byte[] subChunk2 = BitConverter.GetBytes(samples * channels * 2);
        stream.Write(subChunk2, 0, 4);
    }
    private FileStream CreateEmpty(string filepath)
    {
        FileStream fileStream = new FileStream(filepath, FileMode.Create);
        byte emptyByte = new byte();
        for (int i = 0; i < 44; i++) //preparing the header  
        {
            fileStream.WriteByte(emptyByte);
        }
        return fileStream;
    }
    private void ConvertAndWrite(FileStream fileStream, AudioClip clip)
    {
        float[] samples = new float[clip.samples];
        //float[] samples = new float[(int)CurAudioSource.time + 1];
        clip.GetData(samples, 0);
        Int16[] intData = new Int16[samples.Length];
        Byte[] bytesData = new Byte[samples.Length * 2];
        int rescaleFactor = 32767; //to convert float to Int16  
        for (int i = 0; i < samples.Length; i++)
        {
            intData[i] = (short)(samples[i] * rescaleFactor);
            Byte[] byteArr = new Byte[2];
            byteArr = BitConverter.GetBytes(intData[i]);
            byteArr.CopyTo(bytesData, i * 2);
        }
        fileStream.Write(bytesData, 0, bytesData.Length);
    }
    /// <summary>
    /// 把语音转换为文字
    /// </summary>
    /// <param name="url"></param>
    /// <returns></returns>
    private IEnumerator GetAudioString(string url, CallBack cb)
    {
        JsonWriter jw = new JsonWriter();
        jw.WriteObjectStart();
        jw.WritePropertyName("format");
        jw.Write(format);
        jw.WritePropertyName("rate");
        jw.Write(rate);
        jw.WritePropertyName("channel");
        jw.Write(channel);
        jw.WritePropertyName("token");
        jw.Write(token);
        jw.WritePropertyName("cuid");
        jw.Write(cuid);
        jw.WritePropertyName("len");
        jw.Write(len);
        jw.WritePropertyName("speech");
        jw.Write(speech);
        jw.WriteObjectEnd();
        WWWForm w = new WWWForm();
        WWW getASW = new WWW(url, Encoding.Default.GetBytes(jw.ToString()));
        yield return getASW;
        if (getASW.isDone)
        {
            if (getASW.error == null)
            {
                JsonData getASWJson = JsonMapper.ToObject(getASW.text);
                if (getASWJson["err_msg"].ToString() == "success.")
                {
                    audioToString = getASWJson["result"][0].ToString();
                    if (audioToString.Substring(audioToString.Length - 1) == ",")
                        audioToString = audioToString.Substring(0, audioToString.Length - 1);                               
                }
            }
            else
            {
                //Debug.LogError(getASW.error);
                audioToString = "";
                Debug.Log("error:" + getASW.error);
            }
            Debug.Log("此次语音文字为:" + audioToString);
            if (cb != null)
            {
                cb(audioToString);
            }
        }
    }
    /// <summary>
    /// 把录音转换为Byte[]
    /// </summary>
    /// <returns></returns>
    public byte[] GetClipData()
    {
        if (aud.clip == null)
        {
            //Debug.LogError("录音数据为空");
            Debug.Log("录音数据为空");
            return null;
        }
        float[] samples = new float[aud.clip.samples];
        aud.clip.GetData(samples, 0);
        byte[] outData = new byte[samples.Length * 2];
        int rescaleFactor = 32767; //to convert float to Int16   
        for (int i = 0; i < samples.Length; i++)
        {
            short temshort = (short)(samples[i] * rescaleFactor);
            byte[] temdata = System.BitConverter.GetBytes(temshort);
            outData[i * 2] = temdata[0];
            outData[i * 2 + 1] = temdata[1];
        }
        if (outData == null || outData.Length <= 0)
        {
            //Debug.LogError("录音数据为空");
            Debug.Log("录音数据为空");
            return null;
        }
        //return SubByte(outData, 0, audioLength * 8000 * 2);
        return outData;
    }
    void Start () {
    }
    private void OnGUI()
    {
        if (GUILayout.Button("Start"))
            StartMic();
        if (GUILayout.Button("End"))
            EndMic(null);
    }
    public Text debugText;
    private void Update()
    {
        debugText.text = audioToString;
    }
}

如社区发表内容存在侵权行为,您可以点击这里查看侵权投诉指引