From 7e75d15a27765f8e6263fee07d03e5a7fe060225 Mon Sep 17 00:00:00 2001 From: adriano Date: Wed, 24 Sep 2025 17:48:51 -0300 Subject: [PATCH] feat: updated diarization report using llm like gemini flash 2.5 --- backend/app/services/report_service.py | 50 +++++++++++++++++++++++--- 1 file changed, 46 insertions(+), 4 deletions(-) diff --git a/backend/app/services/report_service.py b/backend/app/services/report_service.py index 10715e4..e2d9d84 100644 --- a/backend/app/services/report_service.py +++ b/backend/app/services/report_service.py @@ -129,6 +129,34 @@ class TranscriptionReportService: # Executa agregação principal self.mongo_results = list(collection.aggregate(pipeline)) self.unique_ids = [doc["_id"] for doc in self.mongo_results] + + # TEST + # print("======> self.mongo_results: ", self.mongo_results) + + # if rowMongo := next((m for m in self.mongo_results), None): + # print("============>", rowMongo["totalCost"]) + + # stt = rowMongo.get('usageByType', {}).get('stt', {}) + + # if not stt: + # stt = rowMongo.get('usageByType', {}).get('input-audio', {}) + + # # minutes + # usageMinute = round(stt.get('usage', 0) / 1920, 2) + + # # seconds + # token_by_second = 1920 / 60 + # usageSeconds = round(stt.get('usage', 0) / token_by_second) + + + # print("======> stt_model: ", stt.get('product', 'unknown')) + # print("======> stt_provider: ", stt.get('provider', 'unknown')) + # print("======> stt_cost: ", rowMongo["totalCost"]) + # print("======> stt_usage_minute: ", f"{usageMinute:.2f}") + # print("======> stt_usageSeconds: ", f"{usageSeconds}") + + # exit(1) + # END TEST # Pipeline para contagem total count_pipeline = [ @@ -192,10 +220,24 @@ class TranscriptionReportService: row["llm_provider"] = token_output.get('provider','unknown') stt = rowMongo.get('usageByType', {}).get('stt',{}) - row["stt_model"] = stt.get('product', 'unknown') - row["stt_provider"] = stt.get('provider', 'unknown') - row["stt_cost"] = stt.get('usageCost', 0) - row["stt_usage"] = stt.get('usage', 0) + + if not stt: + stt = rowMongo.get('usageByType', {}).get('input-audio',{}) + + # seconds + token_by_second = 1920 / 60 # Cobrança de token por minuto do modelo gemini flash 2.5 input de audio + usageSeconds = round(stt.get('usage', 0) / token_by_second) + + row["stt_model"] = stt.get('product', 'unknown') + row["stt_provider"] = stt.get('provider', 'unknown') + row["stt_cost"] = rowMongo["totalCost"] + row["stt_usage"] = usageSeconds + + else: + row["stt_model"] = stt.get('product', 'unknown') + row["stt_provider"] = stt.get('provider', 'unknown') + row["stt_cost"] = stt.get('usageCost', 0) + row["stt_usage"] = stt.get('usage', 0) row["total_min"] = f"{(int(row['total_billsec']) / 60):.2f}"