natural-language-api-google/utils/speechToText.js

68 lines
2.2 KiB
JavaScript
Raw Normal View History

// Imports the Google Cloud client library
const speech = require('@google-cloud/speech')
const { StatusCodes } = require("http-status-codes")
const path = require('path')
const fs = require('fs')
const getAudioDuration = require('./getAudioDuration')
const audioUploadToBucket = require('./audioUploadToBucket')
async function speechToText(filename, languageCode = 'pt-Br', bucket = 'speect-to-text-bucket', sampleRateHertz = 16000, encoding = 'LINEAR16') {
const client = new speech.SpeechClient()
let audio
const config = {
encoding: encoding,
sampleRateHertz: sampleRateHertz,
languageCode: languageCode,
}
const seconds = await getAudioDuration(filename)
if (seconds >= 28800) {
return { msg: 'Audio file is higher than 480 minute', status: StatusCodes.BAD_REQUEST }
}
else if (seconds <= 59) {
audio = {
content: fs.readFileSync(filename).toString('base64'),
}
} else if (seconds >= 60) {
const uploaded = await audioUploadToBucket(bucket, filename, path.basename(filename))
if (uploaded) {
audio = {
uri: `gs://${bucket}/${path.basename(filename)}`,
}
}
}
if (!audio) return { msg: `Error on try upload the file to google cloud bucket(${bucket}) storage`, status: StatusCodes.INTERNAL_SERVER_ERROR }
const request = {
config: config,
audio: audio,
}
try {
// Detects speech in the audio file. This creates a recognition job that you
// can wait for now, or get its result later.
const [operation] = await client.longRunningRecognize(request)
// Get a Promise representation of the final result of the job
const [response] = await operation.promise()
const transcription = response.results
.map(result => result.alternatives[0].transcript)
.join('\n')
console.log(`Transcription: ${transcription}`)
return { msg: `Transcript success`, status: StatusCodes.OK, transcription }
} catch (error) {
console.log('ERROR ON TRY TRANSCRIPT: ', error)
return { msg: `Error on try transcript the file`, status: StatusCodes.INTERNAL_SERVER_ERROR }
}
}
module.exports = speechToText