natural-language-api-google/controllers/naturalLanguageController.js

const { StatusCodes } = require("http-status-codes")
const { sentiment, convertTextToSpeech, listVoice, convertAudioToLinear16, getAudioDuration } = require("../utils")
const language = require('@google-cloud/language').v2
const CustomError = require('../errors')
const voiceConfigList = require('../mockData/voice.json')
const languageCodes = require('../mockData/languageCodes.json')
const path = require('path')
const fs = require('fs')
const speech = require('@google-cloud/speech')
const { speechToText, speechToTextJob } = require('../utils')
const client = new speech.SpeechClient()
const protobuf = require('protobufjs')


const getSentiment = async (req, res) => {

    const { text, } = req.body

    const status = await sentiment(text)

    res.status(StatusCodes.OK).json({ status })
}

const getAudioFromText = async (req, res) => {

    const { text, voice_name, voice_gender, languageCode } = req.query

    if ((voice_name || voice_gender || languageCode) && languageCode == 'pt-BR') {

        const config = { voice_name, voice_gender, languageCode }

        for (const key in config) {
            if (config.hasOwnProperty(key) && config[key] === undefined) {
                throw new CustomError.BadRequestError(`The key ${key} is required when setted one of the three configuration parameters: voice_name, voice_gender and languageCode`)
            }
        }

        const voice = voiceConfigList.find(({ name, ssmlGender, languageCode }) => {
            if (name == config.voice_name && ssmlGender == config.voice_gender && languageCode == config.languageCode) return { name, ssmlGender, languageCode }
        })

        if (!voice)
            throw new CustomError.BadRequestError(`Wrong config voice combination! Check the endpoint(http://localhost:6001/api/v1/nl/voice-config) to display the available configurations to a language`)

    }

    const audioBuffer = await convertTextToSpeech(text, voice_name, voice_gender, languageCode)


    if (voice_name && voice_gender && languageCode) {
        filename = `${voice_name}_${voice_gender}_${languageCode}.mp3`
    }
    else {
        filename = `pt-BR-Standard-B_MALE_pt-BR.mp3`
    }

    // Set the Content-Disposition header
    // res.set("Content-Disposition", `attachment; filename="${filename}"`);
    res.set("Content-Disposition", `inline; filename="${filename}"`)

    res.contentType('audio/mpeg')

    res.status(StatusCodes.OK).send(audioBuffer)

}

const getTextFromAudio = async (req, res) => {

    const { languageCode } = req.body

    const audio = req.file

    if (!audio)
        throw new CustomError.BadRequestError(`Missing the audio file`)

    if (languageCode) {
        const existLanguageCode = languageCodes.find(l => l.languageCode == languageCode)

        if (!existLanguageCode) {
            fs.unlinkSync(audio.path)
            throw new CustomError.BadRequestError(`Invalid language code`)
        }

    }
    const inputFile = path.resolve(audio.path)

    const fileName = path.basename(inputFile, path.extname(inputFile))

    const outputFile = path.join(__dirname, '..', 'public', 'uploads', `${fileName}.wav`)

    const filePath = await convertAudioToLinear16(inputFile, outputFile)

    fs.unlinkSync(inputFile)

    const obj = await speechToText(filePath, languageCode)

    fs.unlinkSync(filePath)

    if (obj?.transcription) return res.status(StatusCodes.OK).json({ transcription: obj.transcription })

    res.status(obj.status).json({ msg: obj.msg })

}

const uploadAudioToTranscript = async (req, res) => {

    const { languageCode } = req.body

    const audio = req.file

    if (!audio)
        throw new CustomError.BadRequestError(`Missing the audio file`)

    if (languageCode) {
        const existLanguageCode = languageCodes.find(l => l.languageCode == languageCode)

        if (!existLanguageCode) {
            fs.unlinkSync(audio.path)
            throw new CustomError.BadRequestError(`Invalid language code`)
        }

    }

    const inputFile = path.resolve(audio.path)

    const fileName = path.basename(inputFile, path.extname(inputFile))

    const outputFile = path.join(__dirname, '..', 'public', 'uploads', `${fileName}.wav`)

    const filePath = await convertAudioToLinear16(inputFile, outputFile)

    fs.unlinkSync(inputFile)

    const obj = await speechToTextJob(filePath, languageCode)

    fs.unlinkSync(filePath)

    if (obj?.operationName) return res.status(StatusCodes.OK).json({ operationId: obj.operationName })

    res.status(obj.status).json({ msg: obj.msg })
}

const getJobStatus = async (req, res) => {

    const { operationName } = req.query

    if (!operationName)
        throw new CustomError.BadRequestError(`Missing operationName query parameter`)

    // Get the operation using the operationName
    const [response] = await client.getOperation({ name: operationName })

    if (!response) {
        return res.status(404).json({ msg: "Operation not found" })
    }

    if (response.done) {

        // Load the protobuf message types
        const root = new protobuf.Root()
        root.loadSync(path.join(__dirname, '..', 'node_modules', 'google-proto-files', 'google', 'rpc', 'status.proto'), { keepCase: true })
        root.loadSync(path.join(__dirname, '..', 'node_modules', 'google-proto-files', 'google', 'protobuf', 'duration.proto'), { keepCase: true })
        root.loadSync(path.join(__dirname, '..', 'node_modules', 'google-proto-files', 'google', 'cloud', 'speech', 'v1', 'cloud_speech.proto'), { keepCase: true })

        // Get the message type
        const LongRunningRecognizeResponse = root.lookupType('google.cloud.speech.v1.LongRunningRecognizeResponse')

        if (!response) {
            return res.status(StatusCodes.NOT_FOUND).json({ msg: "Operation not found" })
        }

        // Decode the response value to get transcribed text
        const longRunningResponse = LongRunningRecognizeResponse.decode(response.response.value)
        if (longRunningResponse.error) {
            console.error('Error:', longRunningResponse.error)
            res.status(StatusCodes.INTERNAL_SERVER_ERROR).json({ msg: longRunningResponse.error })
        } else {

            const transcriptions = longRunningResponse.results.map(result => result.alternatives[0].transcript)

            const fullTranscription = transcriptions.join(' ')

            // console.log('Full Transcription:', fullTranscription)

            res.status(StatusCodes.OK).json({ transcription: fullTranscription })

        }

    } else {
        res.status(StatusCodes.ACCEPTED).json({ msg: "Transcription in progress" })
    }

}

const getVoiceConfig = async (req, res) => {

    const { languageCode } = req.query

    console.log(languageCode)

    const configs = await listVoice(languageCode)

    res.status(StatusCodes.OK).json({ configs })
}

module.exports = {
    getSentiment,
    getAudioFromText,
    getTextFromAudio,
    getVoiceConfig,
    getJobStatus,
    uploadAudioToTranscript
}