213 lines
6.8 KiB
JavaScript
213 lines
6.8 KiB
JavaScript
const { StatusCodes } = require("http-status-codes")
|
|
const { sentiment, convertTextToSpeech, listVoice, convertAudioToLinear16, getAudioDuration } = require("../utils")
|
|
const language = require('@google-cloud/language').v2
|
|
const CustomError = require('../errors')
|
|
const voiceConfigList = require('../mockData/voice.json')
|
|
const languageCodes = require('../mockData/languageCodes.json')
|
|
const path = require('path')
|
|
const fs = require('fs')
|
|
const speech = require('@google-cloud/speech')
|
|
const { speechToText, speechToTextJob } = require('../utils')
|
|
const client = new speech.SpeechClient()
|
|
const protobuf = require('protobufjs')
|
|
|
|
|
|
const getSentiment = async (req, res) => {
|
|
|
|
const { text, } = req.body
|
|
|
|
const status = await sentiment(text)
|
|
|
|
res.status(StatusCodes.OK).json({ status })
|
|
}
|
|
|
|
const getAudioFromText = async (req, res) => {
|
|
|
|
const { text, voice_name, voice_gender, languageCode } = req.query
|
|
|
|
if ((voice_name || voice_gender || languageCode) && languageCode == 'pt-BR') {
|
|
|
|
const config = { voice_name, voice_gender, languageCode }
|
|
|
|
for (const key in config) {
|
|
if (config.hasOwnProperty(key) && config[key] === undefined) {
|
|
throw new CustomError.BadRequestError(`The key ${key} is required when setted one of the three configuration parameters: voice_name, voice_gender and languageCode`)
|
|
}
|
|
}
|
|
|
|
const voice = voiceConfigList.find(({ name, ssmlGender, languageCode }) => {
|
|
if (name == config.voice_name && ssmlGender == config.voice_gender && languageCode == config.languageCode) return { name, ssmlGender, languageCode }
|
|
})
|
|
|
|
if (!voice)
|
|
throw new CustomError.BadRequestError(`Wrong config voice combination! Check the endpoint(http://localhost:6001/api/v1/nl/voice-config) to display the available configurations to a language`)
|
|
|
|
}
|
|
|
|
const audioBuffer = await convertTextToSpeech(text, voice_name, voice_gender, languageCode)
|
|
|
|
|
|
if (voice_name && voice_gender && languageCode) {
|
|
filename = `${voice_name}_${voice_gender}_${languageCode}.mp3`
|
|
}
|
|
else {
|
|
filename = `pt-BR-Standard-B_MALE_pt-BR.mp3`
|
|
}
|
|
|
|
// Set the Content-Disposition header
|
|
// res.set("Content-Disposition", `attachment; filename="${filename}"`);
|
|
res.set("Content-Disposition", `inline; filename="${filename}"`)
|
|
|
|
res.contentType('audio/mpeg')
|
|
|
|
res.status(StatusCodes.OK).send(audioBuffer)
|
|
|
|
}
|
|
|
|
const getTextFromAudio = async (req, res) => {
|
|
|
|
const { languageCode } = req.body
|
|
|
|
const audio = req.file
|
|
|
|
if (!audio)
|
|
throw new CustomError.BadRequestError(`Missing the audio file`)
|
|
|
|
if (languageCode) {
|
|
const existLanguageCode = languageCodes.find(l => l.languageCode == languageCode)
|
|
|
|
if (!existLanguageCode) {
|
|
fs.unlinkSync(audio.path)
|
|
throw new CustomError.BadRequestError(`Invalid language code`)
|
|
}
|
|
|
|
}
|
|
const inputFile = path.resolve(audio.path)
|
|
|
|
const fileName = path.basename(inputFile, path.extname(inputFile))
|
|
|
|
const outputFile = path.join(__dirname, '..', 'public', 'uploads', `${fileName}.wav`)
|
|
|
|
const filePath = await convertAudioToLinear16(inputFile, outputFile)
|
|
|
|
fs.unlinkSync(inputFile)
|
|
|
|
const obj = await speechToText(filePath, languageCode)
|
|
|
|
fs.unlinkSync(filePath)
|
|
|
|
if (obj?.transcription) return res.status(StatusCodes.OK).json({ transcription: obj.transcription })
|
|
|
|
res.status(obj.status).json({ msg: obj.msg })
|
|
|
|
}
|
|
|
|
const uploadAudioToTranscript = async (req, res) => {
|
|
|
|
const { languageCode } = req.body
|
|
|
|
const audio = req.file
|
|
|
|
if (!audio)
|
|
throw new CustomError.BadRequestError(`Missing the audio file`)
|
|
|
|
if (languageCode) {
|
|
const existLanguageCode = languageCodes.find(l => l.languageCode == languageCode)
|
|
|
|
if (!existLanguageCode) {
|
|
fs.unlinkSync(audio.path)
|
|
throw new CustomError.BadRequestError(`Invalid language code`)
|
|
}
|
|
|
|
}
|
|
|
|
const inputFile = path.resolve(audio.path)
|
|
|
|
const fileName = path.basename(inputFile, path.extname(inputFile))
|
|
|
|
const outputFile = path.join(__dirname, '..', 'public', 'uploads', `${fileName}.wav`)
|
|
|
|
const filePath = await convertAudioToLinear16(inputFile, outputFile)
|
|
|
|
fs.unlinkSync(inputFile)
|
|
|
|
const obj = await speechToTextJob(filePath, languageCode)
|
|
|
|
fs.unlinkSync(filePath)
|
|
|
|
if (obj?.operationName) return res.status(StatusCodes.OK).json({ operationId: obj.operationName })
|
|
|
|
res.status(obj.status).json({ msg: obj.msg })
|
|
}
|
|
|
|
const getJobStatus = async (req, res) => {
|
|
|
|
const { operationName } = req.query
|
|
|
|
if (!operationName)
|
|
throw new CustomError.BadRequestError(`Missing operationName query parameter`)
|
|
|
|
// Get the operation using the operationName
|
|
const [response] = await client.getOperation({ name: operationName })
|
|
|
|
if (!response) {
|
|
return res.status(404).json({ msg: "Operation not found" })
|
|
}
|
|
|
|
if (response.done) {
|
|
|
|
// Load the protobuf message types
|
|
const root = new protobuf.Root()
|
|
root.loadSync(path.join(__dirname, '..', 'node_modules', 'google-proto-files', 'google', 'rpc', 'status.proto'), { keepCase: true })
|
|
root.loadSync(path.join(__dirname, '..', 'node_modules', 'google-proto-files', 'google', 'protobuf', 'duration.proto'), { keepCase: true })
|
|
root.loadSync(path.join(__dirname, '..', 'node_modules', 'google-proto-files', 'google', 'cloud', 'speech', 'v1', 'cloud_speech.proto'), { keepCase: true })
|
|
|
|
// Get the message type
|
|
const LongRunningRecognizeResponse = root.lookupType('google.cloud.speech.v1.LongRunningRecognizeResponse')
|
|
|
|
if (!response) {
|
|
return res.status(StatusCodes.NOT_FOUND).json({ msg: "Operation not found" })
|
|
}
|
|
|
|
// Decode the response value to get transcribed text
|
|
const longRunningResponse = LongRunningRecognizeResponse.decode(response.response.value)
|
|
if (longRunningResponse.error) {
|
|
console.error('Error:', longRunningResponse.error)
|
|
res.status(StatusCodes.INTERNAL_SERVER_ERROR).json({ msg: longRunningResponse.error })
|
|
} else {
|
|
|
|
const transcriptions = longRunningResponse.results.map(result => result.alternatives[0].transcript)
|
|
|
|
const fullTranscription = transcriptions.join(' ')
|
|
|
|
// console.log('Full Transcription:', fullTranscription)
|
|
|
|
res.status(StatusCodes.OK).json({ transcription: fullTranscription })
|
|
|
|
}
|
|
|
|
} else {
|
|
res.status(StatusCodes.ACCEPTED).json({ msg: "Transcription in progress" })
|
|
}
|
|
|
|
}
|
|
|
|
const getVoiceConfig = async (req, res) => {
|
|
|
|
const { languageCode } = req.query
|
|
|
|
console.log(languageCode)
|
|
|
|
const configs = await listVoice(languageCode)
|
|
|
|
res.status(StatusCodes.OK).json({ configs })
|
|
}
|
|
|
|
module.exports = {
|
|
getSentiment,
|
|
getAudioFromText,
|
|
getTextFromAudio,
|
|
getVoiceConfig,
|
|
getJobStatus,
|
|
uploadAudioToTranscript
|
|
} |