Skip to main content

Data Analysis Examples

Examples of building data analysis agents that process, analyze, and generate insights from various data sources.

Sales Data Analyzer

Analyze sales performance and generate business insights:
from erdo import Agent, state
from erdo.actions import llm, codeexec, memory, utils
from erdo.conditions import IsSuccess, GreaterThan, IsError

sales_analyzer = Agent(
    name="sales_data_analyzer",
    description="Analyzes sales data and generates business insights"
)

# Step 1: Load and validate data
preprocess_step = sales_analyzer.step(
    codeexec.execute(
        code=f"""
import pandas as pd
import json

# Load sales data
data = pd.read_csv('{state.data_source}')

# Basic validation
missing_data = data.isnull().sum()
total_revenue = data['amount'].sum()
unique_customers = data['customer_id'].nunique()

result = {{
    "total_revenue": float(total_revenue),
    "unique_customers": int(unique_customers),
    "total_transactions": len(data),
    "data_quality": "good" if missing_data.sum() < len(data) * 0.1 else "poor"
}}

print(json.dumps(result))
"""
    )
)

# Step 2: Generate insights
analysis_step = sales_analyzer.step(
    llm.message(
        model="claude-sonnet-4",
        query=f"Analyze this sales data: {preprocess_step.output.result}",
        response_format={
            "Type": "json_schema",
            "Schema": {
                "type": "object",
                "properties": {
                    "insights": {"type": "array"},
                    "recommendations": {"type": "array"},
                    "confidence": {"type": "number"}
                }
            }
        }
    ),
    depends_on=preprocess_step
)

# Store high-confidence results
analysis_step.on(
    IsSuccess() & GreaterThan("confidence", "0.8"),
    memory.store(memory={
        "content": analysis_step.output.response,
        "type": "sales_analysis",
        "tags": ["sales", "validated"]
    })
)

# Flag low confidence for review
analysis_step.on(
    IsSuccess() & ~GreaterThan("confidence", "0.8"),
    utils.send_status(
        status="review_needed",
        message="Analysis has low confidence - needs review"
    )
)

Customer Segmentation

Segment customers based on behavior and value:
segmentation_agent = Agent(
    name="customer_segmentation",
    description="Segments customers based on purchase behavior"
)

# Analyze customer data
segment_step = segmentation_agent.step(
    codeexec.execute(
        code=f"""
import pandas as pd
import json

# Load customer data
data = pd.read_csv('{state.customer_data}')

# Calculate customer metrics
customer_metrics = data.groupby('customer_id').agg({{
    'amount': ['sum', 'mean', 'count'],
    'date': ['min', 'max']
}}).round(2)

# Simple segmentation
customer_metrics['total_spent'] = customer_metrics[('amount', 'sum')]
customer_metrics['segment'] = pd.cut(
    customer_metrics['total_spent'],
    bins=[0, 1000, 5000, float('inf')],
    labels=['Low', 'Medium', 'High']
)

segment_counts = customer_metrics['segment'].value_counts().to_dict()

result = {{
    "segments": segment_counts,
    "total_customers": len(customer_metrics),
    "avg_customer_value": float(customer_metrics['total_spent'].mean())
}}

print(json.dumps(result))
"""
    )
)

# Generate segment insights
insights_step = segmentation_agent.step(
    llm.message(
        model="claude-sonnet-4",
        query=f"Analyze customer segments: {segment_step.output.result}",
        system_prompt="Provide actionable insights for each customer segment"
    ),
    depends_on=segment_step
)

Financial Trend Analysis

Analyze financial trends and forecasting:
financial_agent = Agent(
    name="financial_trend_analyzer",
    description="Analyzes financial trends and generates forecasts"
)

# Calculate trends
trend_step = financial_agent.step(
    codeexec.execute(
        code=f"""
import pandas as pd
import numpy as np
import json

# Load financial data
data = pd.read_csv('{state.financial_data}')
data['date'] = pd.to_datetime(data['date'])

# Monthly aggregation
monthly = data.groupby(data['date'].dt.to_period('M'))['revenue'].sum()

# Calculate growth rate
growth_rate = monthly.pct_change().mean()

# Simple trend detection
recent_months = monthly.tail(3)
trend = "increasing" if recent_months.is_monotonic_increasing else "decreasing"

result = {{
    "monthly_growth_rate": float(growth_rate),
    "trend": trend,
    "latest_revenue": float(monthly.iloc[-1]),
    "months_analyzed": len(monthly)
}}

print(json.dumps(result))
"""
    )
)

# Generate financial insights
financial_insights = financial_agent.step(
    llm.message(
        model="claude-sonnet-4",
        query=f"Analyze financial trends: {trend_step.output.result}",
        response_format={
            "Type": "json_schema",
            "Schema": {
                "type": "object",
                "properties": {
                    "summary": {"type": "string"},
                    "forecast": {"type": "string"},
                    "risks": {"type": "array"},
                    "opportunities": {"type": "array"}
                }
            }
        }
    ),
    depends_on=trend_step
)

# Store analysis results
financial_insights.on(
    IsSuccess(),
    memory.store(memory={
        "content": financial_insights.output.response,
        "type": "financial_analysis",
        "tags": ["finance", "trends", state.period]
    })
)

Data Quality Checker

Validate and assess data quality:
quality_agent = Agent(
    name="data_quality_checker",
    description="Checks data quality and identifies issues"
)

# Quality assessment
quality_step = quality_agent.step(
    codeexec.execute(
        code=f"""
import pandas as pd
import json

# Load data
data = pd.read_csv('{state.dataset}')

# Quality checks
missing_percent = (data.isnull().sum() / len(data) * 100).round(2)
duplicate_count = data.duplicated().sum()
numeric_cols = data.select_dtypes(include=['number']).columns

quality_score = 100
if missing_percent.max() > 10:
    quality_score -= 30
if duplicate_count > len(data) * 0.05:
    quality_score -= 20

result = {{
    "quality_score": quality_score,
    "missing_data": missing_percent.to_dict(),
    "duplicate_rows": int(duplicate_count),
    "total_rows": len(data),
    "status": "good" if quality_score > 70 else "poor"
}}

print(json.dumps(result))
"""
    )
)

# Generate quality report
report_step = quality_agent.step(
    llm.message(
        model="claude-sonnet-4",
        query=f"Generate data quality report: {quality_step.output.result}",
        system_prompt="Provide specific recommendations for data quality improvements"
    ),
    depends_on=quality_step
)

# Handle poor quality data
quality_step.on(
    IsSuccess() & ~GreaterThan("quality_score", "70"),
    utils.send_status(
        status="quality_alert",
        message="Data quality is below acceptable threshold",
        priority="high"
    )
)

Market Research Analyzer

Analyze market research data and trends:
market_agent = Agent(
    name="market_research_analyzer",
    description="Analyzes market research data and competitive intelligence"
)

# Research data analysis
research_step = market_agent.step(
    llm.message(
        model="claude-sonnet-4",
        query=f"Analyze market research data: {state.research_data}",
        response_format={
            "Type": "json_schema",
            "Schema": {
                "type": "object",
                "properties": {
                    "market_size": {"type": "string"},
                    "growth_rate": {"type": "string"},
                    "key_trends": {"type": "array"},
                    "competitive_landscape": {"type": "string"},
                    "opportunities": {"type": "array"}
                }
            }
        }
    )
)

# Generate strategic recommendations
strategy_step = market_agent.step(
    llm.message(
        model="claude-sonnet-4",
        query=f"Generate strategic recommendations based on: {research_step.output.response}",
        system_prompt="You are a strategic business consultant. Provide actionable recommendations."
    ),
    depends_on=research_step
)

# Store market insights
research_step.on(
    IsSuccess(),
    memory.store(memory={
        "content": research_step.output.response,
        "type": "market_research",
        "tags": ["market", "research", state.industry]
    })
)

Best Practices

Data Validation

Always validate data before analysis:
# Check data quality first
validation_step = agent.step(
    codeexec.execute(
        code=f"""
import pandas as pd

data = pd.read_csv('{state.file_path}')

# Basic validation
issues = []
if data.empty:
    issues.append("Dataset is empty")
if data.isnull().sum().sum() > len(data) * 0.2:
    issues.append("Too much missing data")

result = {{"valid": len(issues) == 0, "issues": issues}}
print(json.dumps(result))
"""
    )
)

# Only proceed if data is valid
validation_step.on(
    IsSuccess() & TextContains("valid", "true"),
    llm.message(model="claude-sonnet-4", query="Proceed with analysis")
)

Error Handling

Handle analysis errors gracefully:
analysis_step.on(
    IsError(),
    utils.send_status(
        status="analysis_failed",
        message=f"Analysis failed: {analysis_step.error.message}"
    ),
    # Try simpler analysis as fallback
    llm.message(
        model="claude-sonnet-4",
        query="Provide basic summary of the data",
        system_prompt="Focus on simple, observable patterns"
    )
)

Result Storage

Store analysis results for future reference:
analysis_step.on(
    IsSuccess(),
    memory.store(memory={
        "content": analysis_step.output.response,
        "type": "data_analysis",
        "tags": ["analysis", state.dataset_name, state.analysis_type],
        "extra": {
            "timestamp": "{{system.current_time}}",
            "model_used": "claude-sonnet-4"
        }
    })
)