feat(audio-conversion): add controller for Google Speech-to-Text API
							parent
							
								
									0ae216e9bc
								
							
						
					
					
						commit
						5673c86505
					
				
							
								
								
									
										11
									
								
								app.js
								
								
								
								
							
							
						
						
									
										11
									
								
								app.js
								
								
								
								
							|  | @ -10,22 +10,19 @@ const morgan = require('morgan') | |||
| // const fileUpload = require('express-fileupload')
 | ||||
| 
 | ||||
| const rateLimiter = require('express-rate-limit') | ||||
| const helmet = require('helmet') | ||||
| const xss = require('xss-clean') | ||||
| const cors = require('cors') | ||||
| 
 | ||||
| // Swagger
 | ||||
| const swaggerUI = require('swagger-ui-express') | ||||
| const YAML = require('yamljs') | ||||
| const swaggerDocument = YAML.load('./swagger.yaml')  | ||||
| 
 | ||||
| // database
 | ||||
| const connectDB = require('./db/connect') | ||||
| const helmet = require('helmet') | ||||
| const xss = require('xss-clean') | ||||
| const cors = require('cors')   | ||||
|   | ||||
| // routers  
 | ||||
| const nlRouter = require('./routes/naturalLanguageRoute')  | ||||
| 
 | ||||
| 
 | ||||
| const notFoundMiddlware = require('./middleware/not-found') | ||||
| const errorHandlerMiddleware = require('./middleware/error-handler') | ||||
| 
 | ||||
|  | @ -44,10 +41,8 @@ app.use(xss()) | |||
| app.use(morgan('tiny')) | ||||
| app.use(express.json()) | ||||
|   | ||||
| // app.use(express.static('./public'))
 | ||||
| // app.use(fileUpload())
 | ||||
| 
 | ||||
| 
 | ||||
| app.get('/', (req, res) => { | ||||
|     res.send('<h1>Sentiment API</h1><a href="/api-docs">Documentation</a>') | ||||
| })  | ||||
|  |  | |||
|  | @ -1,8 +1,16 @@ | |||
| const { StatusCodes } = require("http-status-codes") | ||||
| const { sentiment, convertTextToSpeech, listVoice } = require("../utils") | ||||
| const { sentiment, convertTextToSpeech, listVoice, convertAudioToLinear16, getAudioDuration } = require("../utils") | ||||
| const language = require('@google-cloud/language').v2 | ||||
| const CustomError = require('../errors') | ||||
| const voiceConfigList = require('../mockData/voice.json') | ||||
| const languageCodes = require('../mockData/languageCodes.json') | ||||
| const path = require('path') | ||||
| const fs = require('fs') | ||||
| const speech = require('@google-cloud/speech') | ||||
| const { speechToText, speechToTextJob } = require('../utils') | ||||
| const client = new speech.SpeechClient() | ||||
| const protobuf = require('protobufjs') | ||||
| 
 | ||||
| 
 | ||||
| const getSentiment = async (req, res) => { | ||||
| 
 | ||||
|  | @ -39,18 +47,16 @@ const getAudioFromText = async (req, res) => { | |||
|     const audioBuffer = await convertTextToSpeech(text, voice_name, voice_gender, languageCode) | ||||
| 
 | ||||
| 
 | ||||
|     if (voice_name && voice_gender && languageCode){ | ||||
|     if (voice_name && voice_gender && languageCode) { | ||||
|         filename = `${voice_name}_${voice_gender}_${languageCode}.mp3` | ||||
|     } | ||||
|     else{ | ||||
|     else { | ||||
|         filename = `pt-BR-Standard-B_MALE_pt-BR.mp3` | ||||
|     } | ||||
| 
 | ||||
|     // Set the Content-Disposition header 
 | ||||
|     // res.set("Content-Disposition", `attachment; filename="${filename}"`); 
 | ||||
|     res.set("Content-Disposition", `inline; filename="${filename}"`); | ||||
| 
 | ||||
| 
 | ||||
|     res.set("Content-Disposition", `inline; filename="${filename}"`) | ||||
| 
 | ||||
|     res.contentType('audio/mpeg') | ||||
| 
 | ||||
|  | @ -58,6 +64,134 @@ const getAudioFromText = async (req, res) => { | |||
| 
 | ||||
| } | ||||
| 
 | ||||
| const getTextFromAudio = async (req, res) => { | ||||
| 
 | ||||
|     const { languageCode } = req.body | ||||
| 
 | ||||
|     const audio = req.file | ||||
| 
 | ||||
|     if (!audio) | ||||
|         throw new CustomError.BadRequestError(`Missing the audio file`) | ||||
| 
 | ||||
|     if (languageCode) { | ||||
|         const existLanguageCode = languageCodes.find(l => l.languageCode == languageCode) | ||||
| 
 | ||||
|         if (!existLanguageCode) { | ||||
|             fs.unlinkSync(audio.path) | ||||
|             throw new CustomError.BadRequestError(`Invalid language code`) | ||||
|         } | ||||
| 
 | ||||
|     } | ||||
|     const inputFile = path.resolve(audio.path) | ||||
| 
 | ||||
|     const fileName = path.basename(inputFile, path.extname(inputFile)) | ||||
| 
 | ||||
|     const outputFile = path.join(__dirname, '..', 'public', 'uploads', `${fileName}.wav`) | ||||
| 
 | ||||
|     const filePath = await convertAudioToLinear16(inputFile, outputFile) | ||||
| 
 | ||||
|     fs.unlinkSync(inputFile) | ||||
| 
 | ||||
|     const obj = await speechToText(filePath, languageCode) | ||||
| 
 | ||||
|     fs.unlinkSync(filePath) | ||||
| 
 | ||||
|     if (obj?.transcription) return res.status(StatusCodes.OK).json({ transcription: obj.transcription }) | ||||
| 
 | ||||
|     res.status(obj.status).json({ msg: obj.msg }) | ||||
| 
 | ||||
| } | ||||
| 
 | ||||
| const uploadAudioToTranscript = async (req, res) => { | ||||
| 
 | ||||
|     const { languageCode } = req.body | ||||
| 
 | ||||
|     const audio = req.file | ||||
| 
 | ||||
|     if (!audio) | ||||
|         throw new CustomError.BadRequestError(`Missing the audio file`) | ||||
| 
 | ||||
|     if (languageCode) { | ||||
|         const existLanguageCode = languageCodes.find(l => l.languageCode == languageCode) | ||||
| 
 | ||||
|         if (!existLanguageCode) { | ||||
|             fs.unlinkSync(audio.path) | ||||
|             throw new CustomError.BadRequestError(`Invalid language code`) | ||||
|         } | ||||
| 
 | ||||
|     } | ||||
| 
 | ||||
|     const inputFile = path.resolve(audio.path) | ||||
| 
 | ||||
|     const fileName = path.basename(inputFile, path.extname(inputFile)) | ||||
| 
 | ||||
|     const outputFile = path.join(__dirname, '..', 'public', 'uploads', `${fileName}.wav`) | ||||
| 
 | ||||
|     const filePath = await convertAudioToLinear16(inputFile, outputFile) | ||||
| 
 | ||||
|     fs.unlinkSync(inputFile) | ||||
| 
 | ||||
|     const obj = await speechToTextJob(filePath, languageCode) | ||||
| 
 | ||||
|     fs.unlinkSync(filePath) | ||||
| 
 | ||||
|     if (obj?.operationName) return res.status(StatusCodes.OK).json({ operationId: obj.operationName }) | ||||
| 
 | ||||
|     res.status(obj.status).json({ msg: obj.msg }) | ||||
| } | ||||
| 
 | ||||
| const getJobStatus = async (req, res) => { | ||||
| 
 | ||||
|     const { operationName } = req.query | ||||
| 
 | ||||
|     if (!operationName) | ||||
|         throw new CustomError.BadRequestError(`Missing operationName query parameter`) | ||||
| 
 | ||||
|     // Get the operation using the operationName 
 | ||||
|     const [response] = await client.getOperation({ name: operationName }) | ||||
| 
 | ||||
|     if (!response) { | ||||
|         return res.status(404).json({ msg: "Operation not found" }) | ||||
|     } | ||||
| 
 | ||||
|     if (response.done) { | ||||
| 
 | ||||
|         // Load the protobuf message types
 | ||||
|         const root = new protobuf.Root() | ||||
|         root.loadSync(path.join(__dirname, '..', 'node_modules', 'google-proto-files', 'google', 'rpc', 'status.proto'), { keepCase: true }) | ||||
|         root.loadSync(path.join(__dirname, '..', 'node_modules', 'google-proto-files', 'google', 'protobuf', 'duration.proto'), { keepCase: true }) | ||||
|         root.loadSync(path.join(__dirname, '..', 'node_modules', 'google-proto-files', 'google', 'cloud', 'speech', 'v1', 'cloud_speech.proto'), { keepCase: true }) | ||||
| 
 | ||||
|         // Get the message type
 | ||||
|         const LongRunningRecognizeResponse = root.lookupType('google.cloud.speech.v1.LongRunningRecognizeResponse') | ||||
| 
 | ||||
|         if (!response) { | ||||
|             return res.status(StatusCodes.NOT_FOUND).json({ msg: "Operation not found" }) | ||||
|         } | ||||
| 
 | ||||
|         // Decode the response value to get transcribed text
 | ||||
|         const longRunningResponse = LongRunningRecognizeResponse.decode(response.response.value) | ||||
|         if (longRunningResponse.error) { | ||||
|             console.error('Error:', longRunningResponse.error) | ||||
|             res.status(StatusCodes.INTERNAL_SERVER_ERROR).json({ msg: longRunningResponse.error }) | ||||
|         } else { | ||||
| 
 | ||||
|             const transcriptions = longRunningResponse.results.map(result => result.alternatives[0].transcript) | ||||
| 
 | ||||
|             const fullTranscription = transcriptions.join(' ') | ||||
| 
 | ||||
|             // console.log('Full Transcription:', fullTranscription)
 | ||||
| 
 | ||||
|             res.status(StatusCodes.OK).json({ transcription: fullTranscription }) | ||||
| 
 | ||||
|         } | ||||
| 
 | ||||
|     } else { | ||||
|         res.status(StatusCodes.ACCEPTED).json({ msg: "Transcription in progress" }) | ||||
|     } | ||||
| 
 | ||||
| } | ||||
| 
 | ||||
| const getVoiceConfig = async (req, res) => { | ||||
| 
 | ||||
|     const { languageCode } = req.query | ||||
|  | @ -72,5 +206,8 @@ const getVoiceConfig = async (req, res) => { | |||
| module.exports = { | ||||
|     getSentiment, | ||||
|     getAudioFromText, | ||||
|     getVoiceConfig | ||||
|     getTextFromAudio, | ||||
|     getVoiceConfig, | ||||
|     getJobStatus, | ||||
|     uploadAudioToTranscript | ||||
| } | ||||
|  | @ -0,0 +1,59 @@ | |||
| [ | ||||
|   { "languageCode": "af-ZA" }, | ||||
|   { "languageCode": "ar-XA" }, | ||||
|   { "languageCode": "bg-BG" }, | ||||
|   { "languageCode": "bn-IN" }, | ||||
|   { "languageCode": "ca-ES" }, | ||||
|   { "languageCode": "cmn-CN" }, | ||||
|   { "languageCode": "cmn-TW" }, | ||||
|   { "languageCode": "cs-CZ" }, | ||||
|   { "languageCode": "da-DK" }, | ||||
|   { "languageCode": "de-DE" }, | ||||
|   { "languageCode": "el-GR" }, | ||||
|   { "languageCode": "en-AU" }, | ||||
|   { "languageCode": "en-GB" }, | ||||
|   { "languageCode": "en-IN" }, | ||||
|   { "languageCode": "en-US" }, | ||||
|   { "languageCode": "es-ES" }, | ||||
|   { "languageCode": "es-US" }, | ||||
|   { "languageCode": "eu-ES" }, | ||||
|   { "languageCode": "fi-FI" }, | ||||
|   { "languageCode": "fil-PH" }, | ||||
|   { "languageCode": "fr-CA" }, | ||||
|   { "languageCode": "fr-FR" }, | ||||
|   { "languageCode": "gl-ES" }, | ||||
|   { "languageCode": "gu-IN" }, | ||||
|   { "languageCode": "he-IL" }, | ||||
|   { "languageCode": "hi-IN" }, | ||||
|   { "languageCode": "hu-HU" }, | ||||
|   { "languageCode": "id-ID" }, | ||||
|   { "languageCode": "is-IS" }, | ||||
|   { "languageCode": "it-IT" }, | ||||
|   { "languageCode": "ja-JP" }, | ||||
|   { "languageCode": "kn-IN" }, | ||||
|   { "languageCode": "ko-KR" }, | ||||
|   { "languageCode": "lt-LT" }, | ||||
|   { "languageCode": "lv-LV" }, | ||||
|   { "languageCode": "ml-IN" }, | ||||
|   { "languageCode": "mr-IN" }, | ||||
|   { "languageCode": "ms-MY" }, | ||||
|   { "languageCode": "nb-NO" }, | ||||
|   { "languageCode": "nl-BE" }, | ||||
|   { "languageCode": "nl-NL" }, | ||||
|   { "languageCode": "pa-IN" }, | ||||
|   { "languageCode": "pl-PL" }, | ||||
|   { "languageCode": "pt-BR" }, | ||||
|   { "languageCode": "pt-PT" }, | ||||
|   { "languageCode": "ro-RO" }, | ||||
|   { "languageCode": "ru-RU" }, | ||||
|   { "languageCode": "sk-SK" }, | ||||
|   { "languageCode": "sr-RS" }, | ||||
|   { "languageCode": "sv-SE" }, | ||||
|   { "languageCode": "ta-IN" }, | ||||
|   { "languageCode": "te-IN" }, | ||||
|   { "languageCode": "th-TH" }, | ||||
|   { "languageCode": "tr-TR" }, | ||||
|   { "languageCode": "uk-UA" }, | ||||
|   { "languageCode": "vi-VN" }, | ||||
|   { "languageCode": "yue-HK" } | ||||
| ] | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										10
									
								
								package.json
								
								
								
								
							
							
						
						
									
										10
									
								
								package.json
								
								
								
								
							|  | @ -11,6 +11,8 @@ | |||
|   "license": "ISC", | ||||
|   "dependencies": { | ||||
|     "@google-cloud/language": "^6.1.0", | ||||
|     "@google-cloud/speech": "^6.0.2", | ||||
|     "@google-cloud/storage": "^7.4.0", | ||||
|     "@google-cloud/text-to-speech": "^5.0.1", | ||||
|     "bcryptjs": "^2.4.3", | ||||
|     "cookie-parser": "^1.4.5", | ||||
|  | @ -21,14 +23,20 @@ | |||
|     "express-fileupload": "^1.2.1", | ||||
|     "express-mongo-sanitize": "^2.1.0", | ||||
|     "express-rate-limit": "^5.4.1", | ||||
|     "fluent-ffmpeg": "^2.1.2", | ||||
|     "google-gax": "^4.0.5", | ||||
|     "google-proto-files": "^4.0.0", | ||||
|     "google-protobuf": "^3.21.2", | ||||
|     "helmet": "^4.6.0", | ||||
|     "http-status-codes": "^2.1.4", | ||||
|     "joi": "^17.4.0", | ||||
|     "mongoose": "^7.3.1", | ||||
|     "morgan": "^1.10.0", | ||||
|     "multer": "^1.4.5-lts.1", | ||||
|     "protobufjs": "^7.2.5", | ||||
|     "swagger-ui-express": "^4.1.6", | ||||
|     "validator": "^13.6.0", | ||||
|     "xss-clean": "^0.1.1", | ||||
|     "swagger-ui-express": "^4.1.6", | ||||
|     "yamljs": "^0.3.0" | ||||
|   }, | ||||
|   "devDependencies": { | ||||
|  |  | |||
										
											Binary file not shown.
										
									
								
							| Before Width: | Height: | Size: 127 KiB | 
										
											Binary file not shown.
										
									
								
							| Before Width: | Height: | Size: 205 KiB | 
|  | @ -1,11 +1,14 @@ | |||
| const express = require('express') | ||||
| const router = express.Router() | ||||
| const { authorization, } = require('../middleware/authentication') | ||||
| 
 | ||||
| const { getSentiment, getAudioFromText, getVoiceConfig } = require('../controllers/naturalLanguageController') | ||||
| const { audioUpload } = require("../utils") | ||||
| const { getSentiment, getAudioFromText, getTextFromAudio, getVoiceConfig, uploadAudioToTranscript, getJobStatus } = require('../controllers/naturalLanguageController') | ||||
| 
 | ||||
| router.route('/sentiment').post(authorization, getSentiment) | ||||
| router.route('/text-to-speech').get(getAudioFromText) | ||||
| router.route('/speech-to-text').post(audioUpload.single('audio'), getTextFromAudio) | ||||
| router.route('/upload-audio-to-transcript').post(audioUpload.single('audio'), uploadAudioToTranscript) | ||||
| router.route('/query-job-status').get(getJobStatus) | ||||
| router.route('/voice-config').get(getVoiceConfig) | ||||
| 
 | ||||
| module.exports = router | ||||
|  |  | |||
|  | @ -2,6 +2,7 @@ openapi: 3.0.0 | |||
| info: | ||||
|   title: Natural Language API | ||||
|   contact: {} | ||||
|   description: This API describes the endpoints and parameters to use resources from google cloud api. | ||||
|   version: '1.0' | ||||
| servers: | ||||
| - url: http://localhost:6001/api/v1/nl/ | ||||
|  |  | |||
|  | @ -0,0 +1,25 @@ | |||
| const multer = require('multer') | ||||
| const path = require('path') | ||||
| 
 | ||||
| //Destination to store the file
 | ||||
| const audioStorage = multer.diskStorage({ | ||||
|     destination: function (req, file, cb) { | ||||
|         cb(null, `public/uploads`) | ||||
|     }, | ||||
|     filename: function (req, file, cb) { | ||||
|         cb(null, Date.now() + String(Math.floor(Math.random() * 1000)) + path.extname(file.originalname)) | ||||
|     } | ||||
| }) | ||||
| 
 | ||||
| const audioUpload = multer({ | ||||
|     storage: audioStorage, | ||||
|     fileFilter(req, file, cb) { | ||||
|         if (!file.originalname.match(/\.(mp3|wav|ogg|flac|aac|wma|m4a|mp4|webm|opus|mpeg)$/i)) { | ||||
|             return cb(new Error('Invalid file type. Send only an audio file!')) | ||||
|         } | ||||
|         cb(undefined, true) | ||||
|     } | ||||
| 
 | ||||
| }) | ||||
| 
 | ||||
| module.exports = audioUpload  | ||||
|  | @ -0,0 +1,35 @@ | |||
| // Imports the Google Cloud client library
 | ||||
| const { Storage } = require('@google-cloud/storage') | ||||
| 
 | ||||
| async function audioUploadToBucket( | ||||
|   bucketName, | ||||
|   filePath, | ||||
|   destFileName,  | ||||
| ) { | ||||
|   // [START storage_upload_file] 
 | ||||
| 
 | ||||
|   // Creates a client
 | ||||
|   const storage = new Storage() | ||||
| 
 | ||||
|   async function uploadFile() { | ||||
|     const options = { | ||||
|       destination: destFileName,  | ||||
|     } | ||||
| 
 | ||||
|     await storage.bucket(bucketName).upload(filePath, options) | ||||
|     console.log(`${filePath} uploaded to ${bucketName}`) | ||||
|   } | ||||
|   | ||||
|   try { | ||||
|     await uploadFile() | ||||
|     return true | ||||
|   } catch (error) { | ||||
|     console.error(error) | ||||
|     return false | ||||
|   }  | ||||
|   // [END storage_upload_file]
 | ||||
| } | ||||
| 
 | ||||
|   | ||||
| 
 | ||||
| module.exports = audioUploadToBucket | ||||
|  | @ -0,0 +1,16 @@ | |||
| const ffmpeg = require('fluent-ffmpeg') | ||||
| 
 | ||||
| async function convertToLINEAR16(inputFile, outputFile) { | ||||
|     return new Promise((resolve, reject) => { | ||||
|         ffmpeg(inputFile) | ||||
|             .audioCodec('pcm_s16le') // Set the audio codec to LINEAR16
 | ||||
|             .audioFrequency(16000)   // Set the sample rate to 16,000 Hz
 | ||||
|             .audioChannels(1) | ||||
|             .on('end', () => resolve(outputFile)) | ||||
|             .on('error', (err) => reject(err)) | ||||
|             .save(outputFile) | ||||
|     }) | ||||
| } | ||||
| 
 | ||||
| module.exports = convertToLINEAR16 | ||||
|   | ||||
|  | @ -0,0 +1,17 @@ | |||
| const ffmpeg = require('fluent-ffmpeg') | ||||
| 
 | ||||
| async function getAudioDuration(filePath) { | ||||
|     return new Promise((resolve, reject) => { | ||||
|         ffmpeg.ffprobe(filePath, (err, metadata) => { | ||||
|             if (err) { | ||||
|                 reject(err) | ||||
|             } else { | ||||
|                 resolve(Math.round(metadata.format.duration)) | ||||
|             } | ||||
|         }) | ||||
|     }) | ||||
| }  | ||||
| 
 | ||||
| module.exports = getAudioDuration | ||||
| 
 | ||||
| 
 | ||||
|  | @ -5,10 +5,21 @@ | |||
| const sentiment = require('./sentiment') | ||||
| const convertTextToSpeech = require('./textToSpeech') | ||||
| const listVoice = require('./listVoice') | ||||
| const convertAudioToLinear16 = require('./convertAudioToLinear16') | ||||
| const getAudioDuration = require('./getAudioDuration') | ||||
| const audioUploadToBucket = require('./audioUploadToBucket') | ||||
| const audioUpload = require('./audioUpload') | ||||
| const speechToText = require('./speechToText') | ||||
| const speechToTextJob = require('./speechToTextJob')  | ||||
| 
 | ||||
| module.exports = { | ||||
|     sentiment, | ||||
|     convertTextToSpeech, | ||||
|     listVoice | ||||
|     listVoice, | ||||
|     convertAudioToLinear16, | ||||
|     getAudioDuration, | ||||
|     audioUploadToBucket, | ||||
|     audioUpload, | ||||
|     speechToText, | ||||
|     speechToTextJob | ||||
| } | ||||
|   | ||||
|  | @ -0,0 +1,67 @@ | |||
| // Imports the Google Cloud client library
 | ||||
| const speech = require('@google-cloud/speech') | ||||
| const { StatusCodes } = require("http-status-codes")  | ||||
| const path = require('path') | ||||
| const fs = require('fs') | ||||
|  const getAudioDuration = require('./getAudioDuration') | ||||
| const audioUploadToBucket = require('./audioUploadToBucket') | ||||
| 
 | ||||
| async function speechToText(filename, languageCode = 'pt-Br', bucket = 'speect-to-text-bucket', sampleRateHertz = 16000, encoding = 'LINEAR16') { | ||||
| 
 | ||||
|     const client = new speech.SpeechClient() | ||||
| 
 | ||||
|     let audio | ||||
| 
 | ||||
|     const config = { | ||||
|         encoding: encoding, | ||||
|         sampleRateHertz: sampleRateHertz, | ||||
|         languageCode: languageCode, | ||||
|     } | ||||
| 
 | ||||
|     const seconds = await getAudioDuration(filename) | ||||
| 
 | ||||
|     if (seconds >= 28800) { | ||||
|         return { msg: 'Audio file is higher than 480 minute', status: StatusCodes.BAD_REQUEST } | ||||
|     } | ||||
|     else if (seconds <= 59) { | ||||
|         audio = { | ||||
|             content: fs.readFileSync(filename).toString('base64'), | ||||
|         } | ||||
|     } else if (seconds >= 60) { | ||||
|         const uploaded = await audioUploadToBucket(bucket, filename, path.basename(filename)) | ||||
| 
 | ||||
|         if (uploaded) { | ||||
|             audio = { | ||||
|                 uri: `gs://${bucket}/${path.basename(filename)}`, | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     if (!audio) return { msg: `Error on try upload the file to google cloud bucket(${bucket}) storage`, status: StatusCodes.INTERNAL_SERVER_ERROR } | ||||
| 
 | ||||
|     const request = { | ||||
|         config: config, | ||||
|         audio: audio, | ||||
|     } | ||||
| 
 | ||||
|     try { | ||||
|         // Detects speech in the audio file. This creates a recognition job that you
 | ||||
|         // can wait for now, or get its result later.
 | ||||
|         const [operation] = await client.longRunningRecognize(request) | ||||
|         // Get a Promise representation of the final result of the job
 | ||||
|         const [response] = await operation.promise() | ||||
|         const transcription = response.results | ||||
|             .map(result => result.alternatives[0].transcript) | ||||
|             .join('\n') | ||||
|         console.log(`Transcription: ${transcription}`) | ||||
| 
 | ||||
|         return { msg: `Transcript success`, status: StatusCodes.OK, transcription } | ||||
|     } catch (error) { | ||||
|         console.log('ERROR ON TRY TRANSCRIPT: ', error) | ||||
|         return { msg: `Error on try transcript the file`, status: StatusCodes.INTERNAL_SERVER_ERROR } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| module.exports = speechToText | ||||
| 
 | ||||
| 
 | ||||
|  | @ -0,0 +1,64 @@ | |||
| // Imports the Google Cloud client library
 | ||||
| const speech = require('@google-cloud/speech') | ||||
| const path = require('path') | ||||
| const fs = require('fs') | ||||
| const getAudioDuration = require('./getAudioDuration') | ||||
| const audioUploadToBucket = require('./audioUploadToBucket')  | ||||
| const { StatusCodes } = require("http-status-codes") | ||||
| 
 | ||||
| 
 | ||||
| async function speechToTextJob(filename, languageCode = 'pt-Br', bucket = 'speect-to-text-bucket', sampleRateHertz = 16000, encoding = 'LINEAR16') { | ||||
| 
 | ||||
|     const client = new speech.SpeechClient() | ||||
| 
 | ||||
|     let audio | ||||
| 
 | ||||
|     const config = { | ||||
|         encoding: encoding, | ||||
|         sampleRateHertz: sampleRateHertz, | ||||
|         languageCode: languageCode, | ||||
|     } | ||||
| 
 | ||||
|     const seconds = await getAudioDuration(filename) | ||||
| 
 | ||||
|     if (seconds >= 28800) { | ||||
|         return { msg: 'Audio file is higher than 480 minute', status: StatusCodes.BAD_REQUEST } | ||||
|     } | ||||
|     else if (seconds <= 59) { | ||||
|         audio = { | ||||
|             content: fs.readFileSync(filename).toString('base64'), | ||||
|         } | ||||
|     } else if (seconds >= 60) { | ||||
|         const uploaded = await audioUploadToBucket(bucket, filename, path.basename(filename)) | ||||
| 
 | ||||
|         if (uploaded) { | ||||
|             audio = { | ||||
|                 uri: `gs://${bucket}/${path.basename(filename)}`, | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     if (!audio) return { msg: `Error on try upload the file to google cloud bucket(${bucket}) storage`, status: StatusCodes.INTERNAL_SERVER_ERROR } | ||||
| 
 | ||||
|     const request = { | ||||
|         config: config, | ||||
|         audio: audio, | ||||
|     } | ||||
| 
 | ||||
|     try { | ||||
|         // Detects speech in the audio file. This creates a recognition job that you
 | ||||
|         // can wait for now, or get its result later.
 | ||||
|         const [operation] = await client.longRunningRecognize(request) | ||||
| 
 | ||||
|         console.log('===========> operationName: ', operation.name) | ||||
| 
 | ||||
|         return { msg: `success`, status: StatusCodes.OK, operationName: operation.name } | ||||
|     } catch (error) { | ||||
|         console.log('ERROR ON TRY TRANSCRIPT: ', error) | ||||
|         return { msg: `Error on try transcript the file`, status: StatusCodes.INTERNAL_SERVER_ERROR } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| module.exports = speechToTextJob | ||||
| 
 | ||||
| 
 | ||||
		Loading…
	
		Reference in New Issue