Data Analysis Examples
Examples of building data analysis agents that process, analyze, and generate insights from various data sources.Sales Data Analyzer
Analyze sales performance and generate business insights:Copy
from erdo import Agent, state
from erdo.actions import llm, codeexec, memory, utils
from erdo.conditions import IsSuccess, GreaterThan, IsError
sales_analyzer = Agent(
name="sales_data_analyzer",
description="Analyzes sales data and generates business insights"
)
# Step 1: Load and validate data
preprocess_step = sales_analyzer.step(
codeexec.execute(
code=f"""
import pandas as pd
import json
# Load sales data
data = pd.read_csv('{state.data_source}')
# Basic validation
missing_data = data.isnull().sum()
total_revenue = data['amount'].sum()
unique_customers = data['customer_id'].nunique()
result = {{
"total_revenue": float(total_revenue),
"unique_customers": int(unique_customers),
"total_transactions": len(data),
"data_quality": "good" if missing_data.sum() < len(data) * 0.1 else "poor"
}}
print(json.dumps(result))
"""
)
)
# Step 2: Generate insights
analysis_step = sales_analyzer.step(
llm.message(
model="claude-sonnet-4",
query=f"Analyze this sales data: {preprocess_step.output.result}",
response_format={
"Type": "json_schema",
"Schema": {
"type": "object",
"properties": {
"insights": {"type": "array"},
"recommendations": {"type": "array"},
"confidence": {"type": "number"}
}
}
}
),
depends_on=preprocess_step
)
# Store high-confidence results
analysis_step.on(
IsSuccess() & GreaterThan("confidence", "0.8"),
memory.store(memory={
"content": analysis_step.output.response,
"type": "sales_analysis",
"tags": ["sales", "validated"]
})
)
# Flag low confidence for review
analysis_step.on(
IsSuccess() & ~GreaterThan("confidence", "0.8"),
utils.send_status(
status="review_needed",
message="Analysis has low confidence - needs review"
)
)
Customer Segmentation
Segment customers based on behavior and value:Copy
segmentation_agent = Agent(
name="customer_segmentation",
description="Segments customers based on purchase behavior"
)
# Analyze customer data
segment_step = segmentation_agent.step(
codeexec.execute(
code=f"""
import pandas as pd
import json
# Load customer data
data = pd.read_csv('{state.customer_data}')
# Calculate customer metrics
customer_metrics = data.groupby('customer_id').agg({{
'amount': ['sum', 'mean', 'count'],
'date': ['min', 'max']
}}).round(2)
# Simple segmentation
customer_metrics['total_spent'] = customer_metrics[('amount', 'sum')]
customer_metrics['segment'] = pd.cut(
customer_metrics['total_spent'],
bins=[0, 1000, 5000, float('inf')],
labels=['Low', 'Medium', 'High']
)
segment_counts = customer_metrics['segment'].value_counts().to_dict()
result = {{
"segments": segment_counts,
"total_customers": len(customer_metrics),
"avg_customer_value": float(customer_metrics['total_spent'].mean())
}}
print(json.dumps(result))
"""
)
)
# Generate segment insights
insights_step = segmentation_agent.step(
llm.message(
model="claude-sonnet-4",
query=f"Analyze customer segments: {segment_step.output.result}",
system_prompt="Provide actionable insights for each customer segment"
),
depends_on=segment_step
)
Financial Trend Analysis
Analyze financial trends and forecasting:Copy
financial_agent = Agent(
name="financial_trend_analyzer",
description="Analyzes financial trends and generates forecasts"
)
# Calculate trends
trend_step = financial_agent.step(
codeexec.execute(
code=f"""
import pandas as pd
import numpy as np
import json
# Load financial data
data = pd.read_csv('{state.financial_data}')
data['date'] = pd.to_datetime(data['date'])
# Monthly aggregation
monthly = data.groupby(data['date'].dt.to_period('M'))['revenue'].sum()
# Calculate growth rate
growth_rate = monthly.pct_change().mean()
# Simple trend detection
recent_months = monthly.tail(3)
trend = "increasing" if recent_months.is_monotonic_increasing else "decreasing"
result = {{
"monthly_growth_rate": float(growth_rate),
"trend": trend,
"latest_revenue": float(monthly.iloc[-1]),
"months_analyzed": len(monthly)
}}
print(json.dumps(result))
"""
)
)
# Generate financial insights
financial_insights = financial_agent.step(
llm.message(
model="claude-sonnet-4",
query=f"Analyze financial trends: {trend_step.output.result}",
response_format={
"Type": "json_schema",
"Schema": {
"type": "object",
"properties": {
"summary": {"type": "string"},
"forecast": {"type": "string"},
"risks": {"type": "array"},
"opportunities": {"type": "array"}
}
}
}
),
depends_on=trend_step
)
# Store analysis results
financial_insights.on(
IsSuccess(),
memory.store(memory={
"content": financial_insights.output.response,
"type": "financial_analysis",
"tags": ["finance", "trends", state.period]
})
)
Data Quality Checker
Validate and assess data quality:Copy
quality_agent = Agent(
name="data_quality_checker",
description="Checks data quality and identifies issues"
)
# Quality assessment
quality_step = quality_agent.step(
codeexec.execute(
code=f"""
import pandas as pd
import json
# Load data
data = pd.read_csv('{state.dataset}')
# Quality checks
missing_percent = (data.isnull().sum() / len(data) * 100).round(2)
duplicate_count = data.duplicated().sum()
numeric_cols = data.select_dtypes(include=['number']).columns
quality_score = 100
if missing_percent.max() > 10:
quality_score -= 30
if duplicate_count > len(data) * 0.05:
quality_score -= 20
result = {{
"quality_score": quality_score,
"missing_data": missing_percent.to_dict(),
"duplicate_rows": int(duplicate_count),
"total_rows": len(data),
"status": "good" if quality_score > 70 else "poor"
}}
print(json.dumps(result))
"""
)
)
# Generate quality report
report_step = quality_agent.step(
llm.message(
model="claude-sonnet-4",
query=f"Generate data quality report: {quality_step.output.result}",
system_prompt="Provide specific recommendations for data quality improvements"
),
depends_on=quality_step
)
# Handle poor quality data
quality_step.on(
IsSuccess() & ~GreaterThan("quality_score", "70"),
utils.send_status(
status="quality_alert",
message="Data quality is below acceptable threshold",
priority="high"
)
)
Market Research Analyzer
Analyze market research data and trends:Copy
market_agent = Agent(
name="market_research_analyzer",
description="Analyzes market research data and competitive intelligence"
)
# Research data analysis
research_step = market_agent.step(
llm.message(
model="claude-sonnet-4",
query=f"Analyze market research data: {state.research_data}",
response_format={
"Type": "json_schema",
"Schema": {
"type": "object",
"properties": {
"market_size": {"type": "string"},
"growth_rate": {"type": "string"},
"key_trends": {"type": "array"},
"competitive_landscape": {"type": "string"},
"opportunities": {"type": "array"}
}
}
}
)
)
# Generate strategic recommendations
strategy_step = market_agent.step(
llm.message(
model="claude-sonnet-4",
query=f"Generate strategic recommendations based on: {research_step.output.response}",
system_prompt="You are a strategic business consultant. Provide actionable recommendations."
),
depends_on=research_step
)
# Store market insights
research_step.on(
IsSuccess(),
memory.store(memory={
"content": research_step.output.response,
"type": "market_research",
"tags": ["market", "research", state.industry]
})
)
Best Practices
Data Validation
Always validate data before analysis:Copy
# Check data quality first
validation_step = agent.step(
codeexec.execute(
code=f"""
import pandas as pd
data = pd.read_csv('{state.file_path}')
# Basic validation
issues = []
if data.empty:
issues.append("Dataset is empty")
if data.isnull().sum().sum() > len(data) * 0.2:
issues.append("Too much missing data")
result = {{"valid": len(issues) == 0, "issues": issues}}
print(json.dumps(result))
"""
)
)
# Only proceed if data is valid
validation_step.on(
IsSuccess() & TextContains("valid", "true"),
llm.message(model="claude-sonnet-4", query="Proceed with analysis")
)
Error Handling
Handle analysis errors gracefully:Copy
analysis_step.on(
IsError(),
utils.send_status(
status="analysis_failed",
message=f"Analysis failed: {analysis_step.error.message}"
),
# Try simpler analysis as fallback
llm.message(
model="claude-sonnet-4",
query="Provide basic summary of the data",
system_prompt="Focus on simple, observable patterns"
)
)
Result Storage
Store analysis results for future reference:Copy
analysis_step.on(
IsSuccess(),
memory.store(memory={
"content": analysis_step.output.response,
"type": "data_analysis",
"tags": ["analysis", state.dataset_name, state.analysis_type],
"extra": {
"timestamp": "{{system.current_time}}",
"model_used": "claude-sonnet-4"
}
})
)