natural-language-api-google/controllers/naturalLanguageController.js

255 lines
8.0 KiB
JavaScript
Raw Normal View History

2023-10-09 17:22:48 +00:00
const { StatusCodes } = require("http-status-codes")
const { sentiment, convertTextToSpeech, listVoice, convertAudioToLinear16, getAudioDuration } = require("../utils")
2023-10-09 17:22:48 +00:00
const language = require('@google-cloud/language').v2
const CustomError = require('../errors')
const voiceConfigList = require('../mockData/voice.json')
const languageCodes = require('../mockData/languageCodes.json')
const path = require('path')
const fs = require('fs')
const speech = require('@google-cloud/speech')
const { speechToText, speechToTextJob, speechToTextOpenai } = require('../utils')
const client = new speech.SpeechClient()
const protobuf = require('protobufjs')
2023-10-09 17:22:48 +00:00
const getSentiment = async (req, res) => {
const { text, } = req.body
const status = await sentiment(text)
res.status(StatusCodes.OK).json({ status })
}
const getAudioFromText = async (req, res) => {
const { text, voice_name, voice_gender, languageCode } = req.query
2023-10-09 17:22:48 +00:00
if ((voice_name || voice_gender || languageCode) && languageCode == 'pt-BR') {
const config = { voice_name, voice_gender, languageCode }
for (const key in config) {
if (config.hasOwnProperty(key) && config[key] === undefined) {
throw new CustomError.BadRequestError(`The key ${key} is required when setted one of the three configuration parameters: voice_name, voice_gender and languageCode`)
}
}
const voice = voiceConfigList.find(({ name, ssmlGender, languageCode }) => {
if (name == config.voice_name && ssmlGender == config.voice_gender && languageCode == config.languageCode) return { name, ssmlGender, languageCode }
})
if (!voice)
2023-10-10 13:58:54 +00:00
throw new CustomError.BadRequestError(`Wrong config voice combination! Check the endpoint(http://localhost:6001/api/v1/nl/voice-config) to display the available configurations to a language`)
2023-10-09 17:22:48 +00:00
}
const audioBuffer = await convertTextToSpeech(text, voice_name, voice_gender, languageCode)
2023-10-10 13:58:54 +00:00
if (voice_name && voice_gender && languageCode) {
2023-10-10 13:58:54 +00:00
filename = `${voice_name}_${voice_gender}_${languageCode}.mp3`
}
else {
2023-10-10 13:58:54 +00:00
filename = `pt-BR-Standard-B_MALE_pt-BR.mp3`
}
2023-10-10 13:58:54 +00:00
// Set the Content-Disposition header
// res.set("Content-Disposition", `attachment; filename="${filename}"`);
res.set("Content-Disposition", `inline; filename="${filename}"`)
2023-10-10 13:58:54 +00:00
2023-10-09 17:22:48 +00:00
res.contentType('audio/mpeg')
res.status(StatusCodes.OK).send(audioBuffer)
}
const getTextFromAudio = async (req, res) => {
const { languageCode } = req.body
const audio = req.file
if (!audio)
throw new CustomError.BadRequestError(`Missing the audio file`)
if (languageCode) {
const existLanguageCode = languageCodes.find(l => l.languageCode == languageCode)
if (!existLanguageCode) {
fs.unlinkSync(audio.path)
throw new CustomError.BadRequestError(`Invalid language code`)
}
}
const inputFile = path.resolve(audio.path)
const fileName = path.basename(inputFile, path.extname(inputFile))
const outputFile = path.join(__dirname, '..', 'public', 'uploads', `${fileName}.wav`)
const filePath = await convertAudioToLinear16(inputFile, outputFile)
fs.unlinkSync(inputFile)
const obj = await speechToText(filePath, languageCode)
fs.unlinkSync(filePath)
if (obj?.transcription) return res.status(StatusCodes.OK).json({ transcription: obj.transcription })
res.status(obj.status).json({ msg: obj.msg })
}
const uploadAudioToTranscript = async (req, res) => {
const { languageCode } = req.body
const audio = req.file
if (!audio)
throw new CustomError.BadRequestError(`Missing the audio file`)
if (languageCode) {
const existLanguageCode = languageCodes.find(l => l.languageCode == languageCode)
if (!existLanguageCode) {
fs.unlinkSync(audio.path)
throw new CustomError.BadRequestError(`Invalid language code`)
}
}
const inputFile = path.resolve(audio.path)
const fileName = path.basename(inputFile, path.extname(inputFile))
const outputFile = path.join(__dirname, '..', 'public', 'uploads', `${fileName}.wav`)
const filePath = await convertAudioToLinear16(inputFile, outputFile)
fs.unlinkSync(inputFile)
const obj = await speechToTextJob(filePath, languageCode)
fs.unlinkSync(filePath)
if (obj?.operationName) return res.status(StatusCodes.OK).json({ operationId: obj.operationName })
res.status(obj.status).json({ msg: obj.msg })
}
const getJobStatus = async (req, res) => {
const { operationName } = req.query
if (!operationName)
throw new CustomError.BadRequestError(`Missing operationName query parameter`)
// Get the operation using the operationName
const [response] = await client.getOperation({ name: operationName })
if (!response) {
return res.status(404).json({ msg: "Operation not found" })
}
if (response.done) {
// Load the protobuf message types
const root = new protobuf.Root()
root.loadSync(path.join(__dirname, '..', 'node_modules', 'google-proto-files', 'google', 'rpc', 'status.proto'), { keepCase: true })
root.loadSync(path.join(__dirname, '..', 'node_modules', 'google-proto-files', 'google', 'protobuf', 'duration.proto'), { keepCase: true })
root.loadSync(path.join(__dirname, '..', 'node_modules', 'google-proto-files', 'google', 'cloud', 'speech', 'v1', 'cloud_speech.proto'), { keepCase: true })
// Get the message type
const LongRunningRecognizeResponse = root.lookupType('google.cloud.speech.v1.LongRunningRecognizeResponse')
if (!response) {
return res.status(StatusCodes.NOT_FOUND).json({ msg: "Operation not found" })
}
// Decode the response value to get transcribed text
const longRunningResponse = LongRunningRecognizeResponse.decode(response.response.value)
if (longRunningResponse.error) {
console.error('Error:', longRunningResponse.error)
res.status(StatusCodes.INTERNAL_SERVER_ERROR).json({ msg: longRunningResponse.error })
} else {
const transcriptions = longRunningResponse.results.map(result => result.alternatives[0].transcript)
const fullTranscription = transcriptions.join(' ')
// console.log('Full Transcription:', fullTranscription)
res.status(StatusCodes.OK).json({ transcription: fullTranscription })
}
} else {
res.status(StatusCodes.ACCEPTED).json({ msg: "Transcription in progress" })
}
}
2023-10-09 17:22:48 +00:00
const getVoiceConfig = async (req, res) => {
const { languageCode } = req.query
console.log(languageCode)
const configs = await listVoice(languageCode)
res.status(StatusCodes.OK).json({ configs })
}
const getTextFromAudioOpenai = async (req, res) => {
// const { languageCode } = req.body
const audio = req.file
if (!audio)
throw new CustomError.BadRequestError(`Missing the audio file`)
// if (languageCode) {
// const existLanguageCode = languageCodes.find(l => l.languageCode == languageCode)
// if (!existLanguageCode) {
// fs.unlinkSync(audio.path)
// throw new CustomError.BadRequestError(`Invalid language code`)
// }
// }
const inputFile = path.resolve(audio.path)
const fileName = path.basename(inputFile, path.extname(inputFile))
// const outputFile = path.join(__dirname, '..', 'public', 'uploads', `${fileName}.wav`)
// const filePath = await convertAudioToLinear16(inputFile, outputFile)
// fs.unlinkSync(inputFile)
const obj = await speechToTextOpenai(inputFile)
// fs.unlinkSync(filePath)
fs.unlinkSync(inputFile)
if (obj?.transcription) return res.status(StatusCodes.OK).json({ transcription: obj.transcription })
res.status(obj.status).json({ msg: obj.msg })
}
2023-10-09 17:22:48 +00:00
module.exports = {
getSentiment,
getAudioFromText,
getTextFromAudio,
getVoiceConfig,
getJobStatus,
uploadAudioToTranscript,
getTextFromAudioOpenai
2023-10-09 17:22:48 +00:00
}