Claude e44e45bfc5
Add Nairobi Information Collector application
Comprehensive intelligence retrieval system for collecting and aggregating
information about Nairobi, Kenya from multiple sources.

Features:
- Multi-source data collection (news, social media, government, tourism, business)
- RESTful API with FastAPI
- Automated scheduling for continuous data collection
- Intelligence brief generation
- Real-time trending topics tracking
- Alert system for important updates
- Web scraping with rate limiting and caching
- Social media integration (Twitter, Instagram)
- NLP-powered categorization and processing
- Docker support for easy deployment
- CLI for manual operations

Components:
- Data models with SQLAlchemy
- Base collector class with extensible architecture
- Source-specific collectors (news, social, government, tourism, business)
- Data processor for brief generation
- Scheduler for automated collection
- Comprehensive API endpoints
- CLI interface for manual control

Documentation:
- Complete README with setup instructions
- Quick start guide
- Example usage scripts
- Docker Compose configuration
- Environment configuration templates
2025-11-21 02:06:23 +00:00

327 lines
8.5 KiB
Python

"""
API routes for Nairobi Information Collector
"""
from fastapi import APIRouter, Depends, HTTPException, Query
from sqlalchemy.orm import Session
from typing import List, Optional
from datetime import datetime, timedelta
from app.database import get_db
from app.models.data_models import (
InformationItem, InformationBrief, Alert, TrendingTopic,
InformationItemSchema, InformationBriefSchema, AlertSchema,
TrendingTopicSchema, SearchQuery, CollectionStats,
CategoryType
)
from app.processors.data_processor import DataProcessor
router = APIRouter(prefix="/api/v1", tags=["api"])
@router.get("/")
async def root():
"""API root endpoint"""
return {
"name": "Nairobi Information Collector API",
"version": "1.0.0",
"endpoints": {
"brief": "/api/v1/brief/latest",
"info": "/api/v1/info/{category}",
"search": "/api/v1/search",
"trending": "/api/v1/trending",
"alerts": "/api/v1/alerts",
"stats": "/api/v1/stats"
}
}
@router.get("/brief/latest", response_model=InformationBriefSchema)
async def get_latest_brief(db: Session = Depends(get_db)):
"""
Get the latest intelligence brief
Returns:
The most recent intelligence brief
"""
brief = db.query(InformationBrief).order_by(
InformationBrief.generated_at.desc()
).first()
if not brief:
# Generate a new brief if none exists
processor = DataProcessor(db)
brief = processor.generate_brief()
return brief
@router.get("/brief/generate", response_model=InformationBriefSchema)
async def generate_new_brief(
hours: int = Query(24, ge=1, le=168),
db: Session = Depends(get_db)
):
"""
Generate a new intelligence brief
Args:
hours: Number of hours to include in the brief (default: 24)
Returns:
Newly generated brief
"""
processor = DataProcessor(db)
brief = processor.generate_brief(hours=hours)
return brief
@router.get("/info/{category}", response_model=List[InformationItemSchema])
async def get_info_by_category(
category: CategoryType,
limit: int = Query(50, ge=1, le=500),
offset: int = Query(0, ge=0),
hours: int = Query(24, ge=1, le=168),
db: Session = Depends(get_db)
):
"""
Get information items by category
Args:
category: Category type (news, events, economy, etc.)
limit: Maximum number of items to return
offset: Number of items to skip
hours: Look back this many hours (default: 24)
Returns:
List of information items
"""
since = datetime.utcnow() - timedelta(hours=hours)
query = db.query(InformationItem).filter(
InformationItem.category == category,
InformationItem.collected_at >= since
)
items = query.order_by(
InformationItem.collected_at.desc()
).offset(offset).limit(limit).all()
return items
@router.get("/info/all", response_model=List[InformationItemSchema])
async def get_all_info(
limit: int = Query(50, ge=1, le=500),
offset: int = Query(0, ge=0),
hours: int = Query(24, ge=1, le=168),
min_reliability: Optional[float] = Query(None, ge=0, le=1),
db: Session = Depends(get_db)
):
"""
Get all information items
Args:
limit: Maximum number of items to return
offset: Number of items to skip
hours: Look back this many hours
min_reliability: Minimum reliability score
Returns:
List of information items
"""
since = datetime.utcnow() - timedelta(hours=hours)
query = db.query(InformationItem).filter(
InformationItem.collected_at >= since
)
if min_reliability is not None:
# Filter by reliability (would need to add mapping)
pass
items = query.order_by(
InformationItem.collected_at.desc()
).offset(offset).limit(limit).all()
return items
@router.get("/search", response_model=List[InformationItemSchema])
async def search_info(
q: str = Query(..., min_length=1),
category: Optional[CategoryType] = None,
from_date: Optional[datetime] = None,
to_date: Optional[datetime] = None,
limit: int = Query(50, ge=1, le=500),
offset: int = Query(0, ge=0),
db: Session = Depends(get_db)
):
"""
Search information items
Args:
q: Search query
category: Filter by category
from_date: Start date
to_date: End date
limit: Maximum number of results
offset: Number of results to skip
Returns:
List of matching information items
"""
query = db.query(InformationItem)
# Text search in title and summary
search_filter = (
InformationItem.title.ilike(f"%{q}%") |
InformationItem.summary.ilike(f"%{q}%")
)
query = query.filter(search_filter)
# Category filter
if category:
query = query.filter(InformationItem.category == category)
# Date filters
if from_date:
query = query.filter(InformationItem.collected_at >= from_date)
if to_date:
query = query.filter(InformationItem.collected_at <= to_date)
# Order and paginate
items = query.order_by(
InformationItem.collected_at.desc()
).offset(offset).limit(limit).all()
return items
@router.get("/trending", response_model=List[TrendingTopicSchema])
async def get_trending(
platform: Optional[str] = None,
limit: int = Query(10, ge=1, le=50),
hours: int = Query(24, ge=1, le=168),
db: Session = Depends(get_db)
):
"""
Get trending topics
Args:
platform: Filter by platform (twitter, instagram, etc.)
limit: Maximum number of topics
hours: Look back this many hours
Returns:
List of trending topics
"""
since = datetime.utcnow() - timedelta(hours=hours)
query = db.query(TrendingTopic).filter(
TrendingTopic.last_updated >= since
)
if platform:
query = query.filter(TrendingTopic.platform == platform)
topics = query.order_by(
TrendingTopic.mention_count.desc()
).limit(limit).all()
return topics
@router.get("/alerts", response_model=List[AlertSchema])
async def get_alerts(
alert_type: Optional[str] = None,
severity: Optional[str] = None,
active_only: bool = True,
db: Session = Depends(get_db)
):
"""
Get current alerts
Args:
alert_type: Filter by type (traffic, weather, security, etc.)
severity: Filter by severity (low, medium, high, critical)
active_only: Only return active alerts
Returns:
List of alerts
"""
query = db.query(Alert)
if active_only:
query = query.filter(Alert.is_active == True)
if alert_type:
query = query.filter(Alert.alert_type == alert_type)
if severity:
query = query.filter(Alert.severity == severity)
alerts = query.order_by(Alert.created_at.desc()).all()
return alerts
@router.get("/stats", response_model=CollectionStats)
async def get_stats(db: Session = Depends(get_db)):
"""
Get collection statistics
Returns:
Statistics about collected data
"""
# Total items
total_items = db.query(InformationItem).count()
# Items by category
items_by_category = {}
for category in CategoryType:
count = db.query(InformationItem).filter(
InformationItem.category == category
).count()
items_by_category[category.value] = count
# Items by source
from sqlalchemy import func
items_by_source_query = db.query(
InformationItem.source_name,
func.count(InformationItem.id)
).group_by(InformationItem.source_name).all()
items_by_source = {
source: count for source, count in items_by_source_query
}
# Latest collection
latest = db.query(InformationItem).order_by(
InformationItem.collected_at.desc()
).first()
latest_collection = latest.collected_at if latest else None
# Active alerts
active_alerts = db.query(Alert).filter(Alert.is_active == True).count()
# Trending topics
trending_count = db.query(TrendingTopic).count()
return CollectionStats(
total_items=total_items,
items_by_category=items_by_category,
items_by_source=items_by_source,
latest_collection=latest_collection,
active_alerts=active_alerts,
trending_topics_count=trending_count
)
@router.get("/health")
async def health_check():
"""Health check endpoint"""
return {
"status": "healthy",
"timestamp": datetime.utcnow().isoformat()
}