Agent Metrics and Analytics
Buddy AI's comprehensive metrics system provides detailed insights into agent performance, user satisfaction, system efficiency, and behavioral patterns.
📊 Metrics Overview
The metrics system tracks multiple dimensions of agent performance to enable optimization, debugging, and quality assurance:
- Performance Metrics: Response time, throughput, resource utilization
- Quality Metrics: Accuracy, completeness, relevance, coherence
- User Experience: Satisfaction scores, engagement levels, task success
- System Health: Error rates, availability, scalability metrics
- Business Impact: Cost efficiency, goal achievement, ROI
graph TD
A[Metrics System] --> B[Performance]
A --> C[Quality]
A --> D[User Experience]
A --> E[System Health]
A --> F[Business Impact]
B --> B1[Response Time]
B --> B2[Throughput]
B --> B3[Resource Usage]
C --> C1[Accuracy]
C --> C2[Relevance]
C --> C3[Completeness]
D --> D1[Satisfaction]
D --> D2[Engagement]
D --> D3[Task Success]
E --> E1[Error Rates]
E --> E2[Availability]
E --> E3[Scalability]
F --> F1[Cost Efficiency]
F --> F2[Goal Achievement]
F --> F3[ROI]
🚀 Quick Start
Basic Metrics Collection
from buddy import Agent
from buddy.models.openai import OpenAIChat
from buddy.eval.metrics import MetricsCollector
# Create agent with metrics
agent = Agent(
model=OpenAIChat(),
metrics_collector=MetricsCollector(
track_performance=True,
track_quality=True,
track_user_experience=True,
storage_backend="local" # or "prometheus", "influxdb", "cloudwatch"
)
)
# Run tasks - metrics are automatically collected
result = agent.run("Analyze this quarterly report")
# View metrics
metrics = agent.get_metrics_summary()
print(f"Response Time: {metrics['performance']['avg_response_time']:.2f}s")
print(f"Quality Score: {metrics['quality']['overall_score']:.2f}")
print(f"User Satisfaction: {metrics['user_experience']['satisfaction']:.2f}")
Real-time Monitoring
from buddy.eval.metrics import MetricsDashboard
# Start real-time dashboard
dashboard = MetricsDashboard(
agents=[agent],
update_interval=5, # seconds
web_interface=True,
port=8080
)
dashboard.start()
# Dashboard available at http://localhost:8080
📈 Performance Metrics
Response Time Tracking
from buddy.eval.performance import ResponseTimeTracker
response_tracker = ResponseTimeTracker(
percentiles=[50, 90, 95, 99], # Track specific percentiles
time_windows=["1m", "5m", "1h", "24h"], # Different time windows
breakdown_by=["model", "task_type", "user_type"] # Segmentation
)
agent.add_metric_tracker(response_tracker)
# Analyze response times
performance_report = response_tracker.generate_report()
print("Response Time Analysis:")
print(f" P50: {performance_report['p50']:.3f}s")
print(f" P90: {performance_report['p90']:.3f}s")
print(f" P95: {performance_report['p95']:.3f}s")
print(f" P99: {performance_report['p99']:.3f}s")
# Breakdown by model
for model, times in performance_report['by_model'].items():
print(f" {model}: {times['avg']:.3f}s (avg)")
Throughput Analysis
from buddy.eval.performance import ThroughputAnalyzer
throughput_analyzer = ThroughputAnalyzer(
time_windows=["1m", "5m", "15m", "1h"],
track_concurrent_requests=True,
track_queue_depth=True
)
agent.add_metric_tracker(throughput_analyzer)
# Monitor throughput
throughput_data = throughput_analyzer.get_current_stats()
print(f"Requests/minute: {throughput_data['rpm']:.1f}")
print(f"Requests/hour: {throughput_data['rph']:.1f}")
print(f"Concurrent requests: {throughput_data['concurrent']}")
print(f"Queue depth: {throughput_data['queue_depth']}")
Resource Utilization
from buddy.eval.performance import ResourceMonitor
resource_monitor = ResourceMonitor(
track_memory=True,
track_cpu=True,
track_gpu=True,
track_network=True,
track_storage=True,
alert_thresholds={
"memory_usage": 0.8, # 80%
"cpu_usage": 0.7, # 70%
"gpu_memory": 0.9, # 90%
"disk_space": 0.8 # 80%
}
)
agent.add_metric_tracker(resource_monitor)
# Check resource usage
resources = resource_monitor.get_current_usage()
print(f"Memory: {resources['memory']['used_gb']:.1f}GB / {resources['memory']['total_gb']:.1f}GB")
print(f"CPU: {resources['cpu']['usage_percent']:.1f}%")
print(f"GPU Memory: {resources['gpu']['memory_used_gb']:.1f}GB")
🎯 Quality Metrics
Accuracy Assessment
from buddy.eval.accuracy import AccuracyEvaluator
accuracy_evaluator = AccuracyEvaluator(
evaluation_methods=[
"ground_truth_comparison",
"expert_human_review",
"automated_fact_checking",
"consistency_validation",
"benchmark_comparison"
],
quality_dimensions=[
"factual_correctness",
"logical_consistency",
"completeness",
"relevance",
"clarity"
]
)
agent.add_metric_tracker(accuracy_evaluator)
# Evaluate response quality
response = agent.run("What are the benefits of renewable energy?")
quality_scores = accuracy_evaluator.evaluate_response(
response=response,
ground_truth=None, # If available
context={"topic": "renewable_energy", "user_level": "general"}
)
print("Quality Assessment:")
for dimension, score in quality_scores.items():
print(f" {dimension}: {score:.3f}")
print(f"Overall Quality Score: {quality_scores['overall']:.3f}")
Relevance Scoring
from buddy.eval.accuracy import RelevanceScorer
relevance_scorer = RelevanceScorer(
scoring_methods=[
"semantic_similarity",
"keyword_matching",
"intent_alignment",
"context_appropriateness"
],
relevance_factors={
"query_answer_alignment": 0.4,
"context_utilization": 0.3,
"user_intent_fulfillment": 0.3
}
)
# Score relevance
relevance_score = relevance_scorer.score_relevance(
query="How do I optimize database performance?",
response=response,
context={"user_expertise": "intermediate", "database_type": "postgresql"}
)
print(f"Relevance Score: {relevance_score['overall']:.3f}")
print("Component Scores:")
for component, score in relevance_score['components'].items():
print(f" {component}: {score:.3f}")
Completeness Analysis
from buddy.eval.accuracy import CompletenessAnalyzer
completeness_analyzer = CompletenessAnalyzer(
completeness_criteria=[
"all_aspects_covered",
"sufficient_detail",
"practical_examples",
"actionable_steps",
"edge_cases_addressed"
]
)
# Analyze completeness
completeness_score = completeness_analyzer.analyze_completeness(
query="Explain machine learning algorithms",
response=response,
expected_aspects=[
"supervised_learning",
"unsupervised_learning",
"reinforcement_learning",
"algorithm_examples",
"use_cases",
"advantages_disadvantages"
]
)
print(f"Completeness Score: {completeness_score['overall']:.3f}")
print("Missing Aspects:")
for aspect in completeness_score['missing_aspects']:
print(f" - {aspect}")
😊 User Experience Metrics
Satisfaction Tracking
from buddy.eval.metrics import SatisfactionTracker
satisfaction_tracker = SatisfactionTracker(
collection_methods=[
"explicit_feedback", # Direct user ratings
"implicit_signals", # Behavioral indicators
"follow_up_questions", # Need for clarification
"task_completion_rate" # Success in achieving goals
],
satisfaction_dimensions=[
"helpfulness",
"clarity",
"completeness",
"timeliness",
"appropriateness"
]
)
agent.add_metric_tracker(satisfaction_tracker)
# Collect satisfaction feedback
satisfaction_tracker.record_explicit_feedback(
interaction_id="12345",
ratings={
"helpfulness": 4.5,
"clarity": 4.0,
"completeness": 3.5,
"timeliness": 5.0,
"appropriateness": 4.0
},
overall_rating=4.2,
comments="Very helpful response, could use more examples"
)
# Analyze satisfaction trends
satisfaction_report = satisfaction_tracker.generate_report(timeframe="last_7_days")
print(f"Average Satisfaction: {satisfaction_report['avg_overall']:.2f}")
print("Dimension Breakdown:")
for dimension, score in satisfaction_report['by_dimension'].items():
print(f" {dimension}: {score:.2f}")
Engagement Analysis
from buddy.eval.metrics import EngagementAnalyzer
engagement_analyzer = EngagementAnalyzer(
engagement_indicators=[
"session_duration",
"messages_per_session",
"return_visits",
"feature_usage_depth",
"proactive_questions",
"task_completion_rate"
],
user_segmentation=[
"new_users",
"regular_users",
"power_users",
"enterprise_users"
]
)
# Track engagement metrics
engagement_data = engagement_analyzer.get_engagement_metrics()
print("Engagement Analysis:")
print(f" Avg Session Duration: {engagement_data['avg_session_duration']:.1f} minutes")
print(f" Messages per Session: {engagement_data['avg_messages_per_session']:.1f}")
print(f" Return Rate: {engagement_data['return_rate']:.1%}")
print(f" Feature Adoption: {engagement_data['feature_adoption_rate']:.1%}")
# Segment analysis
for segment, metrics in engagement_data['by_segment'].items():
print(f"\\n{segment}:")
print(f" Sessions: {metrics['session_count']}")
print(f" Avg Duration: {metrics['avg_duration']:.1f} min")
print(f" Satisfaction: {metrics['avg_satisfaction']:.2f}")
Task Success Rate
from buddy.eval.metrics import TaskSuccessTracker
task_success_tracker = TaskSuccessTracker(
success_criteria={
"information_retrieval": {
"relevant_info_provided": 0.8,
"user_confirms_helpfulness": 0.7
},
"problem_solving": {
"solution_provided": 1.0,
"solution_works": 0.9,
"user_satisfaction": 0.8
},
"creative_tasks": {
"output_generated": 1.0,
"meets_requirements": 0.8,
"user_approval": 0.7
}
}
)
# Track task success
task_result = task_success_tracker.track_task(
task_type="problem_solving",
task_description="Help debug Python code",
outcome_indicators={
"solution_provided": True,
"solution_works": True,
"user_satisfaction": 4.5
}
)
print(f"Task Success: {task_result['success']}")
print(f"Success Score: {task_result['success_score']:.3f}")
print(f"Success Factors Met: {task_result['factors_met']}/{task_result['total_factors']}")
🏥 System Health Metrics
Error Rate Monitoring
from buddy.eval.reliability import ErrorMonitor
error_monitor = ErrorMonitor(
error_categories=[
"model_errors",
"network_errors",
"timeout_errors",
"rate_limit_errors",
"validation_errors",
"system_errors"
],
alert_thresholds={
"error_rate_5m": 0.05, # 5% error rate in 5 minutes
"error_rate_1h": 0.02, # 2% error rate in 1 hour
"consecutive_errors": 5 # 5 consecutive errors
}
)
agent.add_metric_tracker(error_monitor)
# Monitor error rates
error_stats = error_monitor.get_error_statistics()
print("Error Rate Analysis:")
print(f" Overall Error Rate: {error_stats['overall_rate']:.3%}")
print(f" Error Rate (last 5m): {error_stats['rate_5m']:.3%}")
print(f" Error Rate (last 1h): {error_stats['rate_1h']:.3%}")
print("\\nError Breakdown:")
for category, count in error_stats['by_category'].items():
print(f" {category}: {count} errors")
Availability Tracking
from buddy.eval.reliability import AvailabilityTracker
availability_tracker = AvailabilityTracker(
uptime_requirements={
"target_uptime": 0.999, # 99.9% uptime target
"measurement_window": "30d", # 30-day rolling window
"downtime_threshold": 5 # seconds to consider downtime
},
health_checks=[
"model_response_check",
"api_endpoint_check",
"database_connectivity",
"external_service_check"
]
)
# Check availability
availability_stats = availability_tracker.get_availability_stats()
print("Availability Report:")
print(f" Current Uptime: {availability_stats['current_uptime']:.4%}")
print(f" 30-day Uptime: {availability_stats['uptime_30d']:.4%}")
print(f" Downtime (30d): {availability_stats['downtime_minutes']:.1f} minutes")
print(f" SLA Status: {'✅ Meeting' if availability_stats['meeting_sla'] else '❌ Missing'} target")
💰 Business Impact Metrics
Cost Analysis
from buddy.eval.performance import CostAnalyzer
cost_analyzer = CostAnalyzer(
cost_categories=[
"model_api_calls",
"compute_resources",
"storage_costs",
"bandwidth_usage",
"external_services"
],
cost_attribution={
"by_user": True,
"by_task_type": True,
"by_model": True,
"by_time_period": True
}
)
# Analyze costs
cost_report = cost_analyzer.generate_cost_report(period="last_30_days")
print("Cost Analysis (Last 30 Days):")
print(f" Total Cost: ${cost_report['total_cost']:.2f}")
print(f" Cost per Request: ${cost_report['cost_per_request']:.4f}")
print(f" Cost per User: ${cost_report['cost_per_user']:.2f}")
print("\\nCost Breakdown:")
for category, cost in cost_report['by_category'].items():
percentage = (cost / cost_report['total_cost']) * 100
print(f" {category}: ${cost:.2f} ({percentage:.1f}%)")
ROI Analysis
from buddy.eval.performance import ROIAnalyzer
roi_analyzer = ROIAnalyzer(
value_metrics=[
"time_saved_per_user",
"tasks_automated",
"productivity_increase",
"error_reduction",
"customer_satisfaction_improvement"
],
cost_factors=[
"development_cost",
"operational_cost",
"maintenance_cost",
"infrastructure_cost"
]
)
# Calculate ROI
roi_analysis = roi_analyzer.calculate_roi(
time_period="quarterly",
value_assumptions={
"hourly_wage": 50, # $/hour
"time_saved_per_user": 2.5, # hours/week
"users_count": 1000, # active users
"productivity_multiplier": 1.2 # 20% productivity increase
}
)
print("ROI Analysis:")
print(f" Total Value Generated: ${roi_analysis['total_value']:,.2f}")
print(f" Total Costs: ${roi_analysis['total_costs']:,.2f}")
print(f" Net Benefit: ${roi_analysis['net_benefit']:,.2f}")
print(f" ROI: {roi_analysis['roi_percentage']:.1f}%")
print(f" Payback Period: {roi_analysis['payback_months']:.1f} months")
📊 Advanced Analytics
Predictive Analytics
from buddy.eval.analytics import PredictiveAnalyzer
predictive_analyzer = PredictiveAnalyzer(
models=[
"usage_forecasting",
"performance_prediction",
"failure_prediction",
"capacity_planning"
],
prediction_horizons=["1d", "7d", "30d", "90d"]
)
# Generate predictions
predictions = predictive_analyzer.generate_predictions()
print("Predictive Analysis:")
print(f" Expected Usage (7d): {predictions['usage_7d']:.0f} requests")
print(f" Performance Trend: {predictions['performance_trend']}")
print(f" Capacity Needed (30d): {predictions['capacity_30d']}")
print(f" Risk Assessment: {predictions['risk_level']}")
A/B Testing
from buddy.eval.analytics import ABTestFramework
ab_test = ABTestFramework(
test_name="personality_effectiveness",
variants={
"control": {"personality_style": "professional"},
"variant_a": {"personality_style": "friendly"},
"variant_b": {"personality_style": "casual"}
},
metrics=[
"user_satisfaction",
"task_completion_rate",
"engagement_duration",
"return_rate"
],
sample_size=1000,
confidence_level=0.95
)
# Run A/B test
test_results = ab_test.run_test(duration_days=14)
print("A/B Test Results:")
for variant, results in test_results.items():
print(f"\\n{variant}:")
print(f" Satisfaction: {results['user_satisfaction']:.2f}")
print(f" Completion Rate: {results['task_completion_rate']:.1%}")
print(f" Statistical Significance: {results['significance']}")
Cohort Analysis
from buddy.eval.analytics import CohortAnalyzer
cohort_analyzer = CohortAnalyzer(
cohort_definition="monthly", # Group users by signup month
metrics=[
"retention_rate",
"usage_frequency",
"feature_adoption",
"satisfaction_evolution"
],
analysis_period="12_months"
)
# Generate cohort analysis
cohort_analysis = cohort_analyzer.analyze_cohorts()
print("Cohort Analysis:")
print("Monthly Retention Rates:")
for month, retention in cohort_analysis['retention_by_month'].items():
print(f" Month {month}: {retention:.1%}")
print(f"\\nAverage 6-month Retention: {cohort_analysis['avg_6m_retention']:.1%}")
print(f"User Lifecycle Value: ${cohort_analysis['lifecycle_value']:.2f}")
📈 Custom Metrics
Creating Custom Metrics
from buddy.eval.metrics import BaseMetric
class CustomBusinessMetric(BaseMetric):
def __init__(self, name: str):
super().__init__(name)
self.business_goals = []
self.kpis = {}
def add_business_goal(self, goal_name: str, target_value: float):
\"\"\"Add business goal to track.\"\"\"
self.business_goals.append({
"name": goal_name,
"target": target_value,
"current": 0.0
})
def update_kpi(self, kpi_name: str, value: float):
\"\"\"Update KPI value.\"\"\"
self.kpis[kpi_name] = value
def calculate_goal_achievement(self) -> Dict[str, float]:
\"\"\"Calculate achievement percentage for each goal.\"\"\"
achievements = {}
for goal in self.business_goals:
current_value = self.kpis.get(goal["name"], 0.0)
achievement = (current_value / goal["target"]) * 100
achievements[goal["name"]] = min(achievement, 100.0)
return achievements
def get_metric_value(self) -> Dict:
return {
"goals": self.business_goals,
"kpis": self.kpis,
"achievements": self.calculate_goal_achievement()
}
# Use custom metric
business_metric = CustomBusinessMetric("quarterly_goals")
business_metric.add_business_goal("user_satisfaction", 4.5)
business_metric.add_business_goal("cost_per_user", 10.0)
business_metric.add_business_goal("task_success_rate", 0.95)
agent.add_metric_tracker(business_metric)
Metric Aggregation
from buddy.eval.metrics import MetricAggregator
metric_aggregator = MetricAggregator(
aggregation_rules=[
{
"name": "overall_health_score",
"inputs": ["performance_score", "quality_score", "satisfaction_score"],
"weights": [0.3, 0.4, 0.3],
"formula": "weighted_average"
},
{
"name": "efficiency_index",
"inputs": ["throughput", "cost_per_request", "error_rate"],
"formula": "custom",
"function": lambda t, c, e: (t / c) * (1 - e)
}
]
)
# Calculate aggregated metrics
aggregated = metric_aggregator.calculate_aggregated_metrics()
print("Aggregated Metrics:")
for metric_name, value in aggregated.items():
print(f" {metric_name}: {value:.3f}")
🔔 Alerts and Notifications
Metric-based Alerts
from buddy.eval.metrics import AlertSystem
alert_system = AlertSystem(
alerts=[
{
"name": "high_error_rate",
"metric": "error_rate_5m",
"threshold": 0.05,
"comparison": "greater_than",
"severity": "critical",
"notification_channels": ["email", "slack", "pagerduty"]
},
{
"name": "low_satisfaction",
"metric": "user_satisfaction",
"threshold": 3.5,
"comparison": "less_than",
"severity": "warning",
"notification_channels": ["email"]
},
{
"name": "high_cost",
"metric": "cost_per_request",
"threshold": 0.10,
"comparison": "greater_than",
"severity": "warning",
"notification_channels": ["slack"]
}
],
notification_config={
"email": {"smtp_server": "smtp.company.com"},
"slack": {"webhook_url": "https://hooks.slack.com/..."},
"pagerduty": {"integration_key": "..."}
}
)
agent.set_alert_system(alert_system)
📋 Metrics Reporting
Automated Reports
from buddy.eval.metrics import ReportGenerator
report_generator = ReportGenerator(
reports=[
{
"name": "daily_performance_summary",
"schedule": "0 9 * * *", # Daily at 9 AM
"metrics": ["response_time", "throughput", "error_rate"],
"format": "html",
"recipients": ["team@company.com"]
},
{
"name": "weekly_quality_review",
"schedule": "0 10 * * 1", # Monday at 10 AM
"metrics": ["accuracy", "relevance", "completeness", "satisfaction"],
"format": "pdf",
"recipients": ["quality-team@company.com"]
}
]
)
# Generate on-demand report
report = report_generator.generate_report(
report_type="comprehensive_analysis",
time_range="last_30_days",
include_visualizations=True
)
print(f"Report generated: {report['file_path']}")
print(f"Key insights: {report['insights']}")
🎯 Best Practices
Metrics Strategy
- Define Clear Objectives: Align metrics with business goals and user needs
- Balance Breadth and Depth: Track comprehensive metrics without overwhelming
- Actionable Insights: Focus on metrics that drive concrete improvements
- Real-time Monitoring: Enable rapid response to issues
- Historical Analysis: Track trends and long-term patterns
Performance Optimization
- Efficient Collection: Minimize overhead of metrics collection
- Smart Sampling: Use sampling for high-volume metrics
- Asynchronous Processing: Avoid blocking operations with metrics
- Storage Optimization: Use appropriate storage backends for different metrics
- Query Performance: Optimize metric queries and aggregations
The metrics and analytics system provides comprehensive visibility into agent performance, enabling data-driven optimization and ensuring high-quality user experiences.