const { StatusCodes } = require("http-status-codes") const { sentiment, convertTextToSpeech, listVoice, convertAudioToLinear16, getAudioDuration } = require("../utils") const language = require('@google-cloud/language').v2 const CustomError = require('../errors') const voiceConfigList = require('../mockData/voice.json') const languageCodes = require('../mockData/languageCodes.json') const path = require('path') const fs = require('fs') const speech = require('@google-cloud/speech') const { speechToText, speechToTextJob, speechToTextOpenai } = require('../utils') const client = new speech.SpeechClient() const protobuf = require('protobufjs') const getSentiment = async (req, res) => { const { text, } = req.body const status = await sentiment(text) res.status(StatusCodes.OK).json({ status }) } const getAudioFromText = async (req, res) => { const { text, voice_name, voice_gender, languageCode } = req.query if ((voice_name || voice_gender || languageCode) && languageCode == 'pt-BR') { const config = { voice_name, voice_gender, languageCode } for (const key in config) { if (config.hasOwnProperty(key) && config[key] === undefined) { throw new CustomError.BadRequestError(`The key ${key} is required when setted one of the three configuration parameters: voice_name, voice_gender and languageCode`) } } const voice = voiceConfigList.find(({ name, ssmlGender, languageCode }) => { if (name == config.voice_name && ssmlGender == config.voice_gender && languageCode == config.languageCode) return { name, ssmlGender, languageCode } }) if (!voice) throw new CustomError.BadRequestError(`Wrong config voice combination! Check the endpoint(http://localhost:6001/api/v1/nl/voice-config) to display the available configurations to a language`) } const audioBuffer = await convertTextToSpeech(text, voice_name, voice_gender, languageCode) if (voice_name && voice_gender && languageCode) { filename = `${voice_name}_${voice_gender}_${languageCode}.mp3` } else { filename = `pt-BR-Standard-B_MALE_pt-BR.mp3` } // Set the Content-Disposition header // res.set("Content-Disposition", `attachment; filename="${filename}"`); res.set("Content-Disposition", `inline; filename="${filename}"`) res.contentType('audio/mpeg') res.status(StatusCodes.OK).send(audioBuffer) } const getTextFromAudio = async (req, res) => { const { languageCode } = req.body const audio = req.file if (!audio) throw new CustomError.BadRequestError(`Missing the audio file`) if (languageCode) { const existLanguageCode = languageCodes.find(l => l.languageCode == languageCode) if (!existLanguageCode) { fs.unlinkSync(audio.path) throw new CustomError.BadRequestError(`Invalid language code`) } } const inputFile = path.resolve(audio.path) const fileName = path.basename(inputFile, path.extname(inputFile)) const outputFile = path.join(__dirname, '..', 'public', 'uploads', `${fileName}.wav`) const filePath = await convertAudioToLinear16(inputFile, outputFile) fs.unlinkSync(inputFile) const obj = await speechToText(filePath, languageCode) fs.unlinkSync(filePath) if (obj?.transcription) return res.status(StatusCodes.OK).json({ transcription: obj.transcription }) res.status(obj.status).json({ msg: obj.msg }) } const uploadAudioToTranscript = async (req, res) => { const { languageCode } = req.body const audio = req.file if (!audio) throw new CustomError.BadRequestError(`Missing the audio file`) if (languageCode) { const existLanguageCode = languageCodes.find(l => l.languageCode == languageCode) if (!existLanguageCode) { fs.unlinkSync(audio.path) throw new CustomError.BadRequestError(`Invalid language code`) } } const inputFile = path.resolve(audio.path) const fileName = path.basename(inputFile, path.extname(inputFile)) const outputFile = path.join(__dirname, '..', 'public', 'uploads', `${fileName}.wav`) const filePath = await convertAudioToLinear16(inputFile, outputFile) fs.unlinkSync(inputFile) const obj = await speechToTextJob(filePath, languageCode) fs.unlinkSync(filePath) if (obj?.operationName) return res.status(StatusCodes.OK).json({ operationId: obj.operationName }) res.status(obj.status).json({ msg: obj.msg }) } const getJobStatus = async (req, res) => { const { operationName } = req.query if (!operationName) throw new CustomError.BadRequestError(`Missing operationName query parameter`) // Get the operation using the operationName const [response] = await client.getOperation({ name: operationName }) if (!response) { return res.status(404).json({ msg: "Operation not found" }) } if (response.done) { // Load the protobuf message types const root = new protobuf.Root() root.loadSync(path.join(__dirname, '..', 'node_modules', 'google-proto-files', 'google', 'rpc', 'status.proto'), { keepCase: true }) root.loadSync(path.join(__dirname, '..', 'node_modules', 'google-proto-files', 'google', 'protobuf', 'duration.proto'), { keepCase: true }) root.loadSync(path.join(__dirname, '..', 'node_modules', 'google-proto-files', 'google', 'cloud', 'speech', 'v1', 'cloud_speech.proto'), { keepCase: true }) // Get the message type const LongRunningRecognizeResponse = root.lookupType('google.cloud.speech.v1.LongRunningRecognizeResponse') if (!response) { return res.status(StatusCodes.NOT_FOUND).json({ msg: "Operation not found" }) } // Decode the response value to get transcribed text const longRunningResponse = LongRunningRecognizeResponse.decode(response.response.value) if (longRunningResponse.error) { console.error('Error:', longRunningResponse.error) res.status(StatusCodes.INTERNAL_SERVER_ERROR).json({ msg: longRunningResponse.error }) } else { const transcriptions = longRunningResponse.results.map(result => result.alternatives[0].transcript) const fullTranscription = transcriptions.join(' ') // console.log('Full Transcription:', fullTranscription) res.status(StatusCodes.OK).json({ transcription: fullTranscription }) } } else { res.status(StatusCodes.ACCEPTED).json({ msg: "Transcription in progress" }) } } const getVoiceConfig = async (req, res) => { const { languageCode } = req.query console.log(languageCode) const configs = await listVoice(languageCode) res.status(StatusCodes.OK).json({ configs }) } const getTextFromAudioOpenai = async (req, res) => { // const { languageCode } = req.body const audio = req.file if (!audio) throw new CustomError.BadRequestError(`Missing the audio file`) // if (languageCode) { // const existLanguageCode = languageCodes.find(l => l.languageCode == languageCode) // if (!existLanguageCode) { // fs.unlinkSync(audio.path) // throw new CustomError.BadRequestError(`Invalid language code`) // } // } const inputFile = path.resolve(audio.path) const fileName = path.basename(inputFile, path.extname(inputFile)) // const outputFile = path.join(__dirname, '..', 'public', 'uploads', `${fileName}.wav`) // const filePath = await convertAudioToLinear16(inputFile, outputFile) // fs.unlinkSync(inputFile) const obj = await speechToTextOpenai(inputFile) // fs.unlinkSync(filePath) fs.unlinkSync(inputFile) if (obj?.transcription) return res.status(StatusCodes.OK).json({ transcription: obj.transcription }) res.status(obj.status).json({ msg: obj.msg }) } module.exports = { getSentiment, getAudioFromText, getTextFromAudio, getVoiceConfig, getJobStatus, uploadAudioToTranscript, getTextFromAudioOpenai }