feat: updated diarization report using llm like gemini flash 2.5

fix/worong-type
adriano 2025-09-24 17:48:51 -03:00
parent d2a65af57b
commit 7e75d15a27
1 changed files with 46 additions and 4 deletions

View File

@ -129,6 +129,34 @@ class TranscriptionReportService:
# Executa agregação principal
self.mongo_results = list(collection.aggregate(pipeline))
self.unique_ids = [doc["_id"] for doc in self.mongo_results]
# TEST
# print("======> self.mongo_results: ", self.mongo_results)
# if rowMongo := next((m for m in self.mongo_results), None):
# print("============>", rowMongo["totalCost"])
# stt = rowMongo.get('usageByType', {}).get('stt', {})
# if not stt:
# stt = rowMongo.get('usageByType', {}).get('input-audio', {})
# # minutes
# usageMinute = round(stt.get('usage', 0) / 1920, 2)
# # seconds
# token_by_second = 1920 / 60
# usageSeconds = round(stt.get('usage', 0) / token_by_second)
# print("======> stt_model: ", stt.get('product', 'unknown'))
# print("======> stt_provider: ", stt.get('provider', 'unknown'))
# print("======> stt_cost: ", rowMongo["totalCost"])
# print("======> stt_usage_minute: ", f"{usageMinute:.2f}")
# print("======> stt_usageSeconds: ", f"{usageSeconds}")
# exit(1)
# END TEST
# Pipeline para contagem total
count_pipeline = [
@ -192,10 +220,24 @@ class TranscriptionReportService:
row["llm_provider"] = token_output.get('provider','unknown')
stt = rowMongo.get('usageByType', {}).get('stt',{})
row["stt_model"] = stt.get('product', 'unknown')
row["stt_provider"] = stt.get('provider', 'unknown')
row["stt_cost"] = stt.get('usageCost', 0)
row["stt_usage"] = stt.get('usage', 0)
if not stt:
stt = rowMongo.get('usageByType', {}).get('input-audio',{})
# seconds
token_by_second = 1920 / 60 # Cobrança de token por minuto do modelo gemini flash 2.5 input de audio
usageSeconds = round(stt.get('usage', 0) / token_by_second)
row["stt_model"] = stt.get('product', 'unknown')
row["stt_provider"] = stt.get('provider', 'unknown')
row["stt_cost"] = rowMongo["totalCost"]
row["stt_usage"] = usageSeconds
else:
row["stt_model"] = stt.get('product', 'unknown')
row["stt_provider"] = stt.get('provider', 'unknown')
row["stt_cost"] = stt.get('usageCost', 0)
row["stt_usage"] = stt.get('usage', 0)
row["total_min"] = f"{(int(row['total_billsec']) / 60):.2f}"