Merge e44e45bfc547895415af0ffe43ce429b698497e8 into 9b4e9788e4a3a731f7567338ed15d3ec549ce03b

2025-12-08 20:34:32 +08:00 · 2025-11-20 20:11:06 -06:00 · 2025-11-20 20:11:06 -06:00 · 97beb70fb2
commit 97beb70fb2
parent 9b4e9788e4 e44e45bfc5
31 changed files with 4508 additions and 0 deletions
--- a/nairobi-info-collector/.env.example
+++ b/nairobi-info-collector/.env.example
@ -0,0 +1,88 @@
+# Application Settings
+APP_NAME="Nairobi Information Collector"
+APP_VERSION="1.0.0"
+DEBUG=True
+ENVIRONMENT=development
+
+# Server Configuration
+HOST=0.0.0.0
+PORT=8000
+
+# Database Configuration
+DATABASE_URL=postgresql://nairobiuser:password@localhost:5432/nairobi_info
+# For SQLite (development): sqlite:///./nairobi_info.db
+
+# Redis Configuration
+REDIS_URL=redis://localhost:6379/0
+REDIS_PASSWORD=
+
+# API Keys - News Sources
+NEWS_API_KEY=your_news_api_key_here
+
+# API Keys - Social Media
+TWITTER_API_KEY=your_twitter_api_key
+TWITTER_API_SECRET=your_twitter_api_secret
+TWITTER_ACCESS_TOKEN=your_twitter_access_token
+TWITTER_ACCESS_SECRET=your_twitter_access_secret
+TWITTER_BEARER_TOKEN=your_twitter_bearer_token
+
+INSTAGRAM_USERNAME=your_instagram_username
+INSTAGRAM_PASSWORD=your_instagram_password
+
+# API Keys - Maps & Location
+GOOGLE_MAPS_API_KEY=your_google_maps_api_key
+FOURSQUARE_API_KEY=your_foursquare_api_key
+
+# API Keys - NLP & AI
+OPENAI_API_KEY=your_openai_api_key
+ANTHROPIC_API_KEY=your_anthropic_api_key
+
+# Collection Settings
+COLLECTION_INTERVAL_SECONDS=300
+MAX_ITEMS_PER_SOURCE=100
+REQUEST_TIMEOUT_SECONDS=30
+MAX_RETRIES=3
+
+# Rate Limiting
+RATE_LIMIT_REQUESTS_PER_MINUTE=60
+RATE_LIMIT_REQUESTS_PER_HOUR=1000
+
+# Scraping Settings
+USER_AGENT="Mozilla/5.0 (compatible; NairobiInfoBot/1.0)"
+RESPECT_ROBOTS_TXT=True
+ENABLE_CACHING=True
+CACHE_TTL_SECONDS=3600
+
+# Data Processing
+ENABLE_NLP_PROCESSING=True
+ENABLE_SENTIMENT_ANALYSIS=True
+ENABLE_AUTO_CATEGORIZATION=True
+MIN_RELIABILITY_SCORE=0.5
+
+# Logging
+LOG_LEVEL=INFO
+LOG_FILE=logs/nairobi_collector.log
+
+# Security
+SECRET_KEY=your-secret-key-change-this-in-production
+API_KEY_HEADER=X-API-Key
+ALLOWED_ORIGINS=http://localhost:3000,http://localhost:8000
+
+# Monitoring
+SENTRY_DSN=
+ENABLE_METRICS=True
+METRICS_PORT=9090
+
+# Feature Flags
+ENABLE_SOCIAL_MEDIA_COLLECTION=True
+ENABLE_NEWS_COLLECTION=True
+ENABLE_GOVERNMENT_COLLECTION=True
+ENABLE_TOURISM_COLLECTION=True
+ENABLE_BUSINESS_COLLECTION=True
+
+# Email Notifications (for alerts)
+SMTP_HOST=smtp.gmail.com
+SMTP_PORT=587
+SMTP_USERNAME=your_email@gmail.com
+SMTP_PASSWORD=your_app_password
+ALERT_EMAIL_RECIPIENTS=alerts@example.com
--- a/nairobi-info-collector/.gitignore
+++ b/nairobi-info-collector/.gitignore
@ -0,0 +1,65 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Virtual Environment
+venv/
+env/
+ENV/
+.venv
+
+# Environment variables
+.env
+.env.local
+.env.*.local
+
+# Database
+*.db
+*.sqlite
+*.sqlite3
+
+# Logs
+logs/
+*.log
+
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+.DS_Store
+
+# Testing
+.pytest_cache/
+.coverage
+htmlcov/
+.tox/
+
+# Jupyter
+.ipynb_checkpoints
+
+# Docker
+*.pid
+.dockerignore
+
+# OS
+Thumbs.db
--- a/nairobi-info-collector/Dockerfile
+++ b/nairobi-info-collector/Dockerfile
@ -0,0 +1,38 @@
+# Dockerfile for Nairobi Information Collector
+
+FROM python:3.11-slim
+
+# Set working directory
+WORKDIR /app
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    gcc \
+    postgresql-client \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+
+# Copy requirements
+COPY requirements.txt .
+
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Download spaCy model (for NLP)
+RUN python -m spacy download en_core_web_sm
+
+# Copy application code
+COPY . .
+
+# Create logs directory
+RUN mkdir -p logs
+
+# Expose port
+EXPOSE 8000
+
+# Health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
+    CMD curl -f http://localhost:8000/api/v1/health || exit 1
+
+# Run the application
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
--- a/nairobi-info-collector/LICENSE
+++ b/nairobi-info-collector/LICENSE
@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2024 Nairobi Information Collector
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/nairobi-info-collector/QUICKSTART.md
+++ b/nairobi-info-collector/QUICKSTART.md
@ -0,0 +1,236 @@
+# Quick Start Guide
+
+Get the Nairobi Information Collector up and running in minutes!
+
+## Prerequisites
+
+- Python 3.9+ or Docker
+- PostgreSQL (optional, SQLite works for development)
+- API keys for various services (optional but recommended)
+
+## Installation
+
+### Option 1: Using Docker (Recommended)
+
+```bash
+# Clone the repository
+git clone <repository-url>
+cd nairobi-info-collector
+
+# Copy environment file
+cp .env.example .env
+
+# Edit .env with your API keys
+nano .env
+
+# Start with Docker Compose
+docker-compose up -d
+
+# Check logs
+docker-compose logs -f app
+```
+
+The API will be available at `http://localhost:8000`
+
+### Option 2: Local Installation
+
+```bash
+# Clone the repository
+git clone <repository-url>
+cd nairobi-info-collector
+
+# Create virtual environment
+python -m venv venv
+source venv/bin/activate  # On Windows: venv\Scripts\activate
+
+# Install dependencies
+pip install -r requirements.txt
+
+# Download NLP model
+python -m spacy download en_core_web_sm
+
+# Copy and configure environment
+cp .env.example .env
+nano .env
+
+# Initialize database
+python cli.py init-db
+
+# Run the application
+python -m app.main
+```
+
+## Configuration
+
+### Required API Keys
+
+Edit `.env` and add your API keys:
+
+```env
+# Social Media (optional but recommended)
+TWITTER_BEARER_TOKEN=your_twitter_bearer_token
+GOOGLE_MAPS_API_KEY=your_google_maps_key
+
+# NLP Processing (optional)
+OPENAI_API_KEY=your_openai_key
+
+# Database (for production)
+DATABASE_URL=postgresql://user:password@localhost:5432/nairobi_info
+```
+
+### Free Tier Options
+
+You can start without API keys:
+- News collection works without keys (web scraping)
+- Government data works without keys
+- Social media requires API keys
+
+## Usage
+
+### Web API
+
+1. **Access the API documentation:**
+   - Open `http://localhost:8000/docs` in your browser
+   - Interactive Swagger UI with all endpoints
+
+2. **Get the latest brief:**
+   ```bash
+   curl http://localhost:8000/api/v1/brief/latest
+   ```
+
+3. **Search for information:**
+   ```bash
+   curl "http://localhost:8000/api/v1/search?q=restaurant&category=food"
+   ```
+
+4. **Get trending topics:**
+   ```bash
+   curl http://localhost:8000/api/v1/trending
+   ```
+
+### Command Line Interface
+
+```bash
+# Collect news
+python cli.py collect news
+
+# Collect from all sources
+python cli.py collect all
+
+# Generate a brief
+python cli.py brief --hours 24 --output brief.md
+
+# Collect social media (requires API keys)
+python cli.py collect social --platform twitter
+```
+
+## Testing
+
+### Manual Collection Test
+
+```bash
+# Test news collection
+python cli.py collect news
+
+# Check the database
+python -c "from app.database import SessionLocal; from app.models.data_models import InformationItem; db = SessionLocal(); print(f'Items collected: {db.query(InformationItem).count()}')"
+```
+
+### Generate a Brief
+
+```bash
+# Generate and save brief
+python cli.py brief --output my_brief.md
+
+# View the brief
+cat my_brief.md
+```
+
+## Accessing the Data
+
+### Via API
+
+```python
+import requests
+
+# Get latest brief
+response = requests.get("http://localhost:8000/api/v1/brief/latest")
+brief = response.json()
+
+# Search
+response = requests.get(
+    "http://localhost:8000/api/v1/search",
+    params={"q": "nairobi", "limit": 10}
+)
+results = response.json()
+```
+
+### Via Database
+
+```python
+from app.database import SessionLocal
+from app.models.data_models import InformationItem
+
+db = SessionLocal()
+items = db.query(InformationItem).limit(10).all()
+
+for item in items:
+    print(f"{item.title} - {item.category}")
+```
+
+## Automation
+
+The application automatically:
+- Collects data every 5 minutes (configurable)
+- Generates briefs every 6 hours
+- Updates trending topics in real-time
+
+To change collection frequency:
+```env
+# In .env
+COLLECTION_INTERVAL_SECONDS=300  # 5 minutes
+```
+
+## Troubleshooting
+
+### Database connection errors
+```bash
+# Check PostgreSQL is running
+docker-compose ps
+
+# Reset database
+docker-compose down -v
+docker-compose up -d
+```
+
+### No data being collected
+1. Check logs: `docker-compose logs -f app`
+2. Verify network connectivity
+3. Check API keys in `.env`
+4. Try manual collection: `python cli.py collect news`
+
+### Import errors
+```bash
+# Reinstall dependencies
+pip install -r requirements.txt --force-reinstall
+```
+
+## Next Steps
+
+1. **Add API Keys:** Configure Twitter, Google Maps, etc. for more data sources
+2. **Customize Sources:** Edit `app/config.py` to add/remove sources
+3. **Set Up Monitoring:** Configure Sentry for error tracking
+4. **Deploy to Production:** Use Docker Compose with proper environment variables
+
+## API Documentation
+
+Full API documentation available at:
+- Swagger UI: `http://localhost:8000/docs`
+- ReDoc: `http://localhost:8000/redoc`
+
+## Support
+
+For issues and questions:
+- Check logs: `tail -f logs/app.log`
+- View API health: `http://localhost:8000/api/v1/health`
+- See stats: `http://localhost:8000/api/v1/stats`
--- a/nairobi-info-collector/README.md
+++ b/nairobi-info-collector/README.md
@ -0,0 +1,213 @@
+# Nairobi Information Collector
+
+An advanced intelligence retrieval system designed to collect, verify, and synthesize comprehensive information about Nairobi, Kenya from multiple reliable digital sources.
+
+## Features
+
+- **Multi-Source Data Collection**: Gathers information from news sites, social media, government portals, tourism platforms, and business sources
+- **Real-Time Updates**: Continuously collects and updates information
+- **Structured Data**: Organizes information into categories (News, Events, Culture, Economy, etc.)
+- **RESTful API**: Easy-to-use API endpoints for accessing collected data
+- **Automated Scheduling**: Runs collectors at scheduled intervals
+- **Data Verification**: Tracks sources and reliability levels
+- **Categorization**: Automatically categorizes information by type
+
+## Architecture
+
+```
+nairobi-info-collector/
+├── app/
+│   ├── main.py                 # FastAPI application entry point
+│   ├── config.py               # Configuration management
+│   ├── models/                 # Data models
+│   ├── collectors/             # Source-specific data collectors
+│   ├── processors/             # Data processing and NLP
+│   ├── api/                    # API endpoints
+│   ├── database/               # Database connection and setup
+│   └── scheduler/              # Task scheduling
+├── requirements.txt            # Python dependencies
+├── .env                        # Environment variables
+└── docker-compose.yml          # Docker setup
+```
+
+## Installation
+
+### Prerequisites
+
+- Python 3.9+
+- PostgreSQL (or SQLite for development)
+- Redis (for caching and task queue)
+
+### Setup
+
+1. Clone the repository:
+```bash
+git clone <repository-url>
+cd nairobi-info-collector
+```
+
+2. Create a virtual environment:
+```bash
+python -m venv venv
+source venv/bin/activate  # On Windows: venv\Scripts\activate
+```
+
+3. Install dependencies:
+```bash
+pip install -r requirements.txt
+```
+
+4. Configure environment variables:
+```bash
+cp .env.example .env
+# Edit .env with your configuration
+```
+
+5. Initialize the database:
+```bash
+python -m app.database.db init
+```
+
+6. Run the application:
+```bash
+uvicorn app.main:app --reload
+```
+
+### Using Docker
+
+```bash
+docker-compose up -d
+```
+
+## API Endpoints
+
+### Get Latest Brief
+```
+GET /api/v1/brief/latest
+```
+Returns the most recent intelligence brief.
+
+### Get Information by Category
+```
+GET /api/v1/info/{category}
+```
+Categories: `news`, `events`, `culture`, `economy`, `food`, `social`, `travel`, `places`, `community`
+
+### Search Information
+```
+GET /api/v1/search?q={query}&category={category}&from={date}&to={date}
+```
+
+### Get Trending Topics
+```
+GET /api/v1/trending
+```
+
+### Get Real-Time Alerts
+```
+GET /api/v1/alerts
+```
+
+## Data Sources
+
+### News & Media
+- Nation Africa
+- Standard Media
+- Citizen Digital
+- BBC Africa
+- Business Daily Africa
+
+### Government & Public
+- Nairobi City County
+- Kenya Open Data Portal
+- NTSA, KCAA, KNBS
+
+### Tourism
+- TripAdvisor
+- Google Maps
+- Airbnb Experiences
+
+### Social Media
+- Twitter/X (via API)
+- Instagram (via unofficial APIs)
+- TikTok trending
+- YouTube
+
+### Business
+- TechCabal
+- StartUp Kenya
+- LinkedIn insights
+
+## Configuration
+
+Edit `.env` file to configure:
+
+```env
+# Database
+DATABASE_URL=postgresql://user:password@localhost:5432/nairobi_info
+
+# API Keys
+TWITTER_API_KEY=your_key
+GOOGLE_MAPS_API_KEY=your_key
+OPENAI_API_KEY=your_key  # For NLP processing
+
+# Collection Settings
+COLLECTION_INTERVAL=300  # seconds
+MAX_ITEMS_PER_SOURCE=100
+
+# Cache
+REDIS_URL=redis://localhost:6379
+```
+
+## Usage Examples
+
+### Python Client
+
+```python
+import requests
+
+# Get latest brief
+response = requests.get("http://localhost:8000/api/v1/brief/latest")
+brief = response.json()
+
+# Search for specific information
+response = requests.get(
+    "http://localhost:8000/api/v1/search",
+    params={"q": "restaurant opening", "category": "food"}
+)
+results = response.json()
+```
+
+### CLI
+
+```bash
+# Trigger manual collection
+python -m app.collectors.run --source news
+
+# Generate brief
+python -m app.processors.generate_brief
+```
+
+## Contributing
+
+1. Fork the repository
+2. Create a feature branch
+3. Commit your changes
+4. Push to the branch
+5. Create a Pull Request
+
+## Ethical Considerations
+
+- Respects robots.txt
+- Implements rate limiting
+- Uses official APIs where available
+- Caches responses to minimize requests
+- Only collects publicly available information
+
+## License
+
+MIT License
+
+## Support
+
+For issues and questions, please open a GitHub issue.
--- a/nairobi-info-collector/app/init.py
+++ b/nairobi-info-collector/app/init.py
@ -0,0 +1,7 @@
+"""
+Nairobi Information Collector
+Advanced Intelligence Retrieval System
+"""
+
+__version__ = "1.0.0"
+__author__ = "Nairobi Info Collector Team"
--- a/nairobi-info-collector/app/api/init.py
+++ b/nairobi-info-collector/app/api/init.py
@ -0,0 +1,6 @@
+"""
+API routes and endpoints
+"""
+from .routes import router
+
+__all__ = ["router"]
--- a/nairobi-info-collector/app/api/routes.py
+++ b/nairobi-info-collector/app/api/routes.py
@ -0,0 +1,326 @@
+"""
+API routes for Nairobi Information Collector
+"""
+from fastapi import APIRouter, Depends, HTTPException, Query
+from sqlalchemy.orm import Session
+from typing import List, Optional
+from datetime import datetime, timedelta
+
+from app.database import get_db
+from app.models.data_models import (
+    InformationItem, InformationBrief, Alert, TrendingTopic,
+    InformationItemSchema, InformationBriefSchema, AlertSchema,
+    TrendingTopicSchema, SearchQuery, CollectionStats,
+    CategoryType
+)
+from app.processors.data_processor import DataProcessor
+
+router = APIRouter(prefix="/api/v1", tags=["api"])
+
+
+@router.get("/")
+async def root():
+    """API root endpoint"""
+    return {
+        "name": "Nairobi Information Collector API",
+        "version": "1.0.0",
+        "endpoints": {
+            "brief": "/api/v1/brief/latest",
+            "info": "/api/v1/info/{category}",
+            "search": "/api/v1/search",
+            "trending": "/api/v1/trending",
+            "alerts": "/api/v1/alerts",
+            "stats": "/api/v1/stats"
+        }
+    }
+
+
+@router.get("/brief/latest", response_model=InformationBriefSchema)
+async def get_latest_brief(db: Session = Depends(get_db)):
+    """
+    Get the latest intelligence brief
+
+    Returns:
+        The most recent intelligence brief
+    """
+    brief = db.query(InformationBrief).order_by(
+        InformationBrief.generated_at.desc()
+    ).first()
+
+    if not brief:
+        # Generate a new brief if none exists
+        processor = DataProcessor(db)
+        brief = processor.generate_brief()
+
+    return brief
+
+
+@router.get("/brief/generate", response_model=InformationBriefSchema)
+async def generate_new_brief(
+    hours: int = Query(24, ge=1, le=168),
+    db: Session = Depends(get_db)
+):
+    """
+    Generate a new intelligence brief
+
+    Args:
+        hours: Number of hours to include in the brief (default: 24)
+
+    Returns:
+        Newly generated brief
+    """
+    processor = DataProcessor(db)
+    brief = processor.generate_brief(hours=hours)
+    return brief
+
+
+@router.get("/info/{category}", response_model=List[InformationItemSchema])
+async def get_info_by_category(
+    category: CategoryType,
+    limit: int = Query(50, ge=1, le=500),
+    offset: int = Query(0, ge=0),
+    hours: int = Query(24, ge=1, le=168),
+    db: Session = Depends(get_db)
+):
+    """
+    Get information items by category
+
+    Args:
+        category: Category type (news, events, economy, etc.)
+        limit: Maximum number of items to return
+        offset: Number of items to skip
+        hours: Look back this many hours (default: 24)
+
+    Returns:
+        List of information items
+    """
+    since = datetime.utcnow() - timedelta(hours=hours)
+
+    query = db.query(InformationItem).filter(
+        InformationItem.category == category,
+        InformationItem.collected_at >= since
+    )
+
+    items = query.order_by(
+        InformationItem.collected_at.desc()
+    ).offset(offset).limit(limit).all()
+
+    return items
+
+
+@router.get("/info/all", response_model=List[InformationItemSchema])
+async def get_all_info(
+    limit: int = Query(50, ge=1, le=500),
+    offset: int = Query(0, ge=0),
+    hours: int = Query(24, ge=1, le=168),
+    min_reliability: Optional[float] = Query(None, ge=0, le=1),
+    db: Session = Depends(get_db)
+):
+    """
+    Get all information items
+
+    Args:
+        limit: Maximum number of items to return
+        offset: Number of items to skip
+        hours: Look back this many hours
+        min_reliability: Minimum reliability score
+
+    Returns:
+        List of information items
+    """
+    since = datetime.utcnow() - timedelta(hours=hours)
+
+    query = db.query(InformationItem).filter(
+        InformationItem.collected_at >= since
+    )
+
+    if min_reliability is not None:
+        # Filter by reliability (would need to add mapping)
+        pass
+
+    items = query.order_by(
+        InformationItem.collected_at.desc()
+    ).offset(offset).limit(limit).all()
+
+    return items
+
+
+@router.get("/search", response_model=List[InformationItemSchema])
+async def search_info(
+    q: str = Query(..., min_length=1),
+    category: Optional[CategoryType] = None,
+    from_date: Optional[datetime] = None,
+    to_date: Optional[datetime] = None,
+    limit: int = Query(50, ge=1, le=500),
+    offset: int = Query(0, ge=0),
+    db: Session = Depends(get_db)
+):
+    """
+    Search information items
+
+    Args:
+        q: Search query
+        category: Filter by category
+        from_date: Start date
+        to_date: End date
+        limit: Maximum number of results
+        offset: Number of results to skip
+
+    Returns:
+        List of matching information items
+    """
+    query = db.query(InformationItem)
+
+    # Text search in title and summary
+    search_filter = (
+        InformationItem.title.ilike(f"%{q}%") |
+        InformationItem.summary.ilike(f"%{q}%")
+    )
+    query = query.filter(search_filter)
+
+    # Category filter
+    if category:
+        query = query.filter(InformationItem.category == category)
+
+    # Date filters
+    if from_date:
+        query = query.filter(InformationItem.collected_at >= from_date)
+    if to_date:
+        query = query.filter(InformationItem.collected_at <= to_date)
+
+    # Order and paginate
+    items = query.order_by(
+        InformationItem.collected_at.desc()
+    ).offset(offset).limit(limit).all()
+
+    return items
+
+
+@router.get("/trending", response_model=List[TrendingTopicSchema])
+async def get_trending(
+    platform: Optional[str] = None,
+    limit: int = Query(10, ge=1, le=50),
+    hours: int = Query(24, ge=1, le=168),
+    db: Session = Depends(get_db)
+):
+    """
+    Get trending topics
+
+    Args:
+        platform: Filter by platform (twitter, instagram, etc.)
+        limit: Maximum number of topics
+        hours: Look back this many hours
+
+    Returns:
+        List of trending topics
+    """
+    since = datetime.utcnow() - timedelta(hours=hours)
+
+    query = db.query(TrendingTopic).filter(
+        TrendingTopic.last_updated >= since
+    )
+
+    if platform:
+        query = query.filter(TrendingTopic.platform == platform)
+
+    topics = query.order_by(
+        TrendingTopic.mention_count.desc()
+    ).limit(limit).all()
+
+    return topics
+
+
+@router.get("/alerts", response_model=List[AlertSchema])
+async def get_alerts(
+    alert_type: Optional[str] = None,
+    severity: Optional[str] = None,
+    active_only: bool = True,
+    db: Session = Depends(get_db)
+):
+    """
+    Get current alerts
+
+    Args:
+        alert_type: Filter by type (traffic, weather, security, etc.)
+        severity: Filter by severity (low, medium, high, critical)
+        active_only: Only return active alerts
+
+    Returns:
+        List of alerts
+    """
+    query = db.query(Alert)
+
+    if active_only:
+        query = query.filter(Alert.is_active == True)
+
+    if alert_type:
+        query = query.filter(Alert.alert_type == alert_type)
+
+    if severity:
+        query = query.filter(Alert.severity == severity)
+
+    alerts = query.order_by(Alert.created_at.desc()).all()
+
+    return alerts
+
+
+@router.get("/stats", response_model=CollectionStats)
+async def get_stats(db: Session = Depends(get_db)):
+    """
+    Get collection statistics
+
+    Returns:
+        Statistics about collected data
+    """
+    # Total items
+    total_items = db.query(InformationItem).count()
+
+    # Items by category
+    items_by_category = {}
+    for category in CategoryType:
+        count = db.query(InformationItem).filter(
+            InformationItem.category == category
+        ).count()
+        items_by_category[category.value] = count
+
+    # Items by source
+    from sqlalchemy import func
+    items_by_source_query = db.query(
+        InformationItem.source_name,
+        func.count(InformationItem.id)
+    ).group_by(InformationItem.source_name).all()
+
+    items_by_source = {
+        source: count for source, count in items_by_source_query
+    }
+
+    # Latest collection
+    latest = db.query(InformationItem).order_by(
+        InformationItem.collected_at.desc()
+    ).first()
+
+    latest_collection = latest.collected_at if latest else None
+
+    # Active alerts
+    active_alerts = db.query(Alert).filter(Alert.is_active == True).count()
+
+    # Trending topics
+    trending_count = db.query(TrendingTopic).count()
+
+    return CollectionStats(
+        total_items=total_items,
+        items_by_category=items_by_category,
+        items_by_source=items_by_source,
+        latest_collection=latest_collection,
+        active_alerts=active_alerts,
+        trending_topics_count=trending_count
+    )
+
+
+@router.get("/health")
+async def health_check():
+    """Health check endpoint"""
+    return {
+        "status": "healthy",
+        "timestamp": datetime.utcnow().isoformat()
+    }
--- a/nairobi-info-collector/app/collectors/init.py
+++ b/nairobi-info-collector/app/collectors/init.py
@ -0,0 +1,18 @@
+"""
+Data collectors for various sources
+"""
+from .base_collector import BaseCollector
+from .news_collector import NewsCollector
+from .social_media_collector import SocialMediaCollector
+from .government_collector import GovernmentCollector
+from .tourism_collector import TourismCollector
+from .business_collector import BusinessCollector
+
+__all__ = [
+    "BaseCollector",
+    "NewsCollector",
+    "SocialMediaCollector",
+    "GovernmentCollector",
+    "TourismCollector",
+    "BusinessCollector"
+]
--- a/nairobi-info-collector/app/collectors/base_collector.py
+++ b/nairobi-info-collector/app/collectors/base_collector.py
@ -0,0 +1,274 @@
+"""
+Base collector class for all data collection operations
+"""
+import logging
+import time
+from abc import ABC, abstractmethod
+from typing import List, Dict, Optional, Any
+from datetime import datetime
+import requests
+from bs4 import BeautifulSoup
+import hashlib
+from tenacity import retry, stop_after_attempt, wait_exponential
+
+from app.config import get_settings
+from app.models.data_models import (
+    InformationItem, Source, CategoryType, ReliabilityLevel
+)
+from sqlalchemy.orm import Session
+
+logger = logging.getLogger(__name__)
+settings = get_settings()
+
+
+class BaseCollector(ABC):
+    """
+    Base class for all data collectors
+
+    Provides common functionality for:
+    - HTTP requests with retries
+    - Rate limiting
+    - Caching
+    - Data normalization
+    - Error handling
+    """
+
+    def __init__(self, db: Session, source_name: str, source_type: str):
+        """
+        Initialize collector
+
+        Args:
+            db: Database session
+            source_name: Name of the source
+            source_type: Type of source (news, social_media, etc.)
+        """
+        self.db = db
+        self.source_name = source_name
+        self.source_type = source_type
+        self.settings = settings
+
+        # Get or create source in database
+        self.source = self._get_or_create_source()
+
+        # Request session
+        self.session = requests.Session()
+        self.session.headers.update({
+            'User-Agent': settings.user_agent
+        })
+
+        # Rate limiting
+        self.request_count = 0
+        self.last_request_time = 0
+        self.min_request_interval = 60 / settings.rate_limit_requests_per_minute
+
+    def _get_or_create_source(self) -> Source:
+        """Get or create source in database"""
+        source = self.db.query(Source).filter(
+            Source.name == self.source_name
+        ).first()
+
+        if not source:
+            source = Source(
+                name=self.source_name,
+                source_type=self.source_type,
+                reliability_score=0.5,
+                is_active=True
+            )
+            self.db.add(source)
+            self.db.commit()
+            self.db.refresh(source)
+            logger.info(f"Created new source: {self.source_name}")
+
+        return source
+
+    @retry(
+        stop=stop_after_attempt(3),
+        wait=wait_exponential(multiplier=1, min=2, max=10)
+    )
+    def _make_request(
+        self,
+        url: str,
+        method: str = "GET",
+        **kwargs
+    ) -> Optional[requests.Response]:
+        """
+        Make HTTP request with retry logic and rate limiting
+
+        Args:
+            url: URL to request
+            method: HTTP method
+            **kwargs: Additional arguments for requests
+
+        Returns:
+            Response object or None if failed
+        """
+        # Rate limiting
+        elapsed = time.time() - self.last_request_time
+        if elapsed < self.min_request_interval:
+            time.sleep(self.min_request_interval - elapsed)
+
+        try:
+            logger.debug(f"Requesting: {url}")
+
+            response = self.session.request(
+                method=method,
+                url=url,
+                timeout=settings.request_timeout_seconds,
+                **kwargs
+            )
+            response.raise_for_status()
+
+            self.last_request_time = time.time()
+            self.request_count += 1
+
+            return response
+
+        except requests.exceptions.RequestException as e:
+            logger.error(f"Request failed for {url}: {e}")
+            raise
+
+    def _parse_html(self, html: str) -> BeautifulSoup:
+        """
+        Parse HTML content
+
+        Args:
+            html: HTML string
+
+        Returns:
+            BeautifulSoup object
+        """
+        return BeautifulSoup(html, 'lxml')
+
+    def _generate_item_hash(self, title: str, url: str) -> str:
+        """
+        Generate unique hash for an item
+
+        Args:
+            title: Item title
+            url: Item URL
+
+        Returns:
+            Hash string
+        """
+        content = f"{title}{url}".encode('utf-8')
+        return hashlib.md5(content).hexdigest()
+
+    def _item_exists(self, title: str, url: str) -> bool:
+        """
+        Check if item already exists in database
+
+        Args:
+            title: Item title
+            url: Item URL
+
+        Returns:
+            True if exists, False otherwise
+        """
+        existing = self.db.query(InformationItem).filter(
+            InformationItem.title == title,
+            InformationItem.url == url
+        ).first()
+
+        return existing is not None
+
+    def _save_item(self, item_data: Dict[str, Any]) -> Optional[InformationItem]:
+        """
+        Save information item to database
+
+        Args:
+            item_data: Dictionary with item data
+
+        Returns:
+            Saved InformationItem or None
+        """
+        try:
+            # Check if already exists
+            if self._item_exists(item_data.get('title', ''), item_data.get('url', '')):
+                logger.debug(f"Item already exists: {item_data.get('title')}")
+                return None
+
+            # Create item
+            item = InformationItem(
+                title=item_data.get('title'),
+                summary=item_data.get('summary'),
+                content=item_data.get('content'),
+                category=item_data.get('category', CategoryType.NEWS),
+                url=item_data.get('url'),
+                image_url=item_data.get('image_url'),
+                source_id=self.source.id,
+                source_name=self.source_name,
+                reliability_level=item_data.get(
+                    'reliability_level',
+                    ReliabilityLevel.MEDIUM
+                ),
+                published_at=item_data.get('published_at'),
+                location=item_data.get('location'),
+                coordinates=item_data.get('coordinates'),
+                tags=item_data.get('tags', []),
+                entities=item_data.get('entities', {}),
+                is_verified=item_data.get('is_verified', False),
+                is_alert=item_data.get('is_alert', False)
+            )
+
+            self.db.add(item)
+            self.db.commit()
+            self.db.refresh(item)
+
+            logger.info(f"Saved item: {item.title[:50]}...")
+            return item
+
+        except Exception as e:
+            logger.error(f"Error saving item: {e}")
+            self.db.rollback()
+            return None
+
+    @abstractmethod
+    def collect(self) -> List[InformationItem]:
+        """
+        Collect data from source
+
+        Must be implemented by subclasses
+
+        Returns:
+            List of collected InformationItem objects
+        """
+        pass
+
+    def run(self) -> Dict[str, Any]:
+        """
+        Run the collector
+
+        Returns:
+            Dictionary with collection results
+        """
+        start_time = time.time()
+        logger.info(f"Starting collection from {self.source_name}")
+
+        try:
+            items = self.collect()
+
+            elapsed = time.time() - start_time
+
+            result = {
+                'source': self.source_name,
+                'items_collected': len(items),
+                'elapsed_seconds': round(elapsed, 2),
+                'success': True
+            }
+
+            logger.info(
+                f"Collection completed: {len(items)} items in {elapsed:.2f}s"
+            )
+
+            return result
+
+        except Exception as e:
+            logger.error(f"Collection failed for {self.source_name}: {e}")
+
+            return {
+                'source': self.source_name,
+                'items_collected': 0,
+                'elapsed_seconds': 0,
+                'success': False,
+                'error': str(e)
+            }
--- a/nairobi-info-collector/app/collectors/business_collector.py
+++ b/nairobi-info-collector/app/collectors/business_collector.py
@ -0,0 +1,148 @@
+"""
+Business and economy data collector
+"""
+import logging
+from typing import List
+from datetime import datetime
+
+from app.collectors.base_collector import BaseCollector
+from app.models.data_models import InformationItem, CategoryType, ReliabilityLevel
+from app.config import DATA_SOURCES
+
+logger = logging.getLogger(__name__)
+
+
+class BusinessCollector(BaseCollector):
+    """
+    Collector for business and economy information
+
+    Sources:
+    - TechCabal
+    - Business Daily
+    - Startup news
+    - Investment announcements
+    """
+
+    def __init__(self, db):
+        super().__init__(db, "Business Collector", "business")
+        self.config = DATA_SOURCES.get("business", {})
+
+    def collect(self) -> List[InformationItem]:
+        """Collect business news"""
+        all_items = []
+
+        all_items.extend(self._collect_techcabal())
+
+        return all_items
+
+    def _collect_techcabal(self) -> List[InformationItem]:
+        """
+        Collect tech and startup news from TechCabal
+
+        Returns:
+            List of information items
+        """
+        items = []
+        config = self.config.get("techcabal", {})
+
+        if not config.get("enabled"):
+            return items
+
+        url = config.get("url")
+
+        try:
+            response = self._make_request(url)
+            if not response:
+                return items
+
+            soup = self._parse_html(response.text)
+
+            # Find articles
+            articles = soup.find_all(['article', 'div'], class_=lambda x: x and (
+                'article' in x.lower() or
+                'post' in x.lower() or
+                'story' in x.lower()
+            ))
+
+            for article in articles[:self.settings.max_items_per_source]:
+                try:
+                    # Extract title
+                    title_elem = article.find(['h1', 'h2', 'h3'])
+                    if not title_elem:
+                        continue
+
+                    title = title_elem.get_text(strip=True)
+
+                    # Filter for Nairobi/Kenya related content
+                    if not any(word in title.lower() for word in [
+                        'nairobi', 'kenya', 'kenyan', 'east africa'
+                    ]):
+                        continue
+
+                    # Extract link
+                    link_elem = article.find('a', href=True)
+                    if not link_elem:
+                        continue
+
+                    link = link_elem['href']
+                    if link.startswith('/'):
+                        from urllib.parse import urljoin
+                        link = urljoin(url, link)
+
+                    # Extract excerpt
+                    excerpt_elem = article.find(['p', 'div'], class_=lambda x: x and (
+                        'excerpt' in x.lower() or
+                        'summary' in x.lower()
+                    ))
+                    excerpt = excerpt_elem.get_text(strip=True) if excerpt_elem else ""
+
+                    # Extract image
+                    image_url = None
+                    img_elem = article.find('img', src=True)
+                    if img_elem:
+                        image_url = img_elem['src']
+                        if image_url.startswith('/'):
+                            from urllib.parse import urljoin
+                            image_url = urljoin(url, image_url)
+
+                    # Extract date
+                    date_elem = article.find(['time', 'span'], class_=lambda x: x and 'date' in x.lower())
+                    published_at = None
+                    if date_elem and date_elem.get('datetime'):
+                        try:
+                            published_at = datetime.fromisoformat(
+                                date_elem['datetime'].replace('Z', '+00:00')
+                            )
+                        except:
+                            pass
+
+                    # Extract tags
+                    tags = ['business', 'tech', 'startup']
+                    if 'investment' in title.lower() or 'funding' in excerpt.lower():
+                        tags.append('investment')
+                    if 'startup' in title.lower() or 'startup' in excerpt.lower():
+                        tags.append('startup')
+
+                    item_data = {
+                        'title': title,
+                        'summary': excerpt[:500] if excerpt else None,
+                        'url': link,
+                        'image_url': image_url,
+                        'category': CategoryType.ECONOMY,
+                        'published_at': published_at,
+                        'reliability_level': ReliabilityLevel.HIGH,
+                        'tags': tags,
+                        'is_verified': True
+                    }
+
+                    item = self._save_item(item_data)
+                    if item:
+                        items.append(item)
+
+                except Exception as e:
+                    logger.error(f"Error processing TechCabal article: {e}")
+
+        except Exception as e:
+            logger.error(f"Error collecting from TechCabal: {e}")
+
+        return items
--- a/nairobi-info-collector/app/collectors/government_collector.py
+++ b/nairobi-info-collector/app/collectors/government_collector.py
@ -0,0 +1,213 @@
+"""
+Government and public services data collector
+"""
+import logging
+from typing import List
+from datetime import datetime
+
+from app.collectors.base_collector import BaseCollector
+from app.models.data_models import (
+    InformationItem, Alert, CategoryType, ReliabilityLevel
+)
+from app.config import DATA_SOURCES
+
+logger = logging.getLogger(__name__)
+
+
+class GovernmentCollector(BaseCollector):
+    """
+    Collector for government and public service information
+
+    Sources:
+    - Nairobi City County
+    - Kenya Open Data Portal
+    - NTSA (traffic/road updates)
+    - Public service announcements
+    """
+
+    def __init__(self, db):
+        super().__init__(db, "Government Collector", "government")
+        self.config = DATA_SOURCES.get("government", {})
+
+    def collect(self) -> List[InformationItem]:
+        """Collect government and public data"""
+        all_items = []
+
+        all_items.extend(self._collect_nairobi_county())
+        all_items.extend(self._collect_open_data())
+
+        return all_items
+
+    def _collect_nairobi_county(self) -> List[InformationItem]:
+        """
+        Collect from Nairobi City County website
+
+        Returns:
+            List of information items
+        """
+        items = []
+        config = self.config.get("nairobi_county", {})
+
+        if not config.get("enabled"):
+            return items
+
+        url = config.get("url")
+
+        try:
+            response = self._make_request(url)
+            if not response:
+                return items
+
+            soup = self._parse_html(response.text)
+
+            # Find announcements and news
+            announcements = soup.find_all(['div', 'article'], class_=lambda x: x and (
+                'announcement' in x.lower() or
+                'news' in x.lower() or
+                'notice' in x.lower()
+            ))
+
+            for announcement in announcements[:self.settings.max_items_per_source]:
+                try:
+                    # Extract title
+                    title_elem = announcement.find(['h1', 'h2', 'h3', 'h4'])
+                    if not title_elem:
+                        continue
+
+                    title = title_elem.get_text(strip=True)
+
+                    # Extract content
+                    content_elem = announcement.find(['p', 'div'], class_=lambda x: x and 'content' in x.lower())
+                    content = content_elem.get_text(strip=True) if content_elem else ""
+
+                    # Extract link
+                    link_elem = announcement.find('a', href=True)
+                    link = link_elem['href'] if link_elem else url
+                    if link.startswith('/'):
+                        from urllib.parse import urljoin
+                        link = urljoin(url, link)
+
+                    # Check if it's an alert
+                    is_alert = any(word in title.lower() for word in [
+                        'alert', 'urgent', 'warning', 'closure', 'disruption'
+                    ])
+
+                    # Categorize
+                    category = self._categorize_government_content(title, content)
+
+                    item_data = {
+                        'title': title,
+                        'summary': content[:500] if content else None,
+                        'content': content,
+                        'url': link,
+                        'category': category,
+                        'reliability_level': ReliabilityLevel.VERIFIED,
+                        'tags': ['government', 'nairobi county'],
+                        'is_verified': True,
+                        'is_alert': is_alert
+                    }
+
+                    item = self._save_item(item_data)
+                    if item:
+                        items.append(item)
+
+                        # Create alert if necessary
+                        if is_alert:
+                            self._create_alert(title, content, link)
+
+                except Exception as e:
+                    logger.error(f"Error processing announcement: {e}")
+
+        except Exception as e:
+            logger.error(f"Error collecting from Nairobi County: {e}")
+
+        return items
+
+    def _collect_open_data(self) -> List[InformationItem]:
+        """
+        Collect from Kenya Open Data Portal
+
+        Returns:
+            List of information items
+        """
+        items = []
+        config = self.config.get("kenya_open_data", {})
+
+        if not config.get("enabled"):
+            return items
+
+        # Kenya Open Data typically provides datasets via API
+        # This is a simplified example - you'd want to use their API properly
+
+        logger.info("Kenya Open Data collection - placeholder for API integration")
+
+        return items
+
+    def _categorize_government_content(self, title: str, content: str) -> CategoryType:
+        """Categorize government content"""
+        text = f"{title} {content}".lower()
+
+        if any(word in text for word in ['traffic', 'road', 'transport', 'closure']):
+            return CategoryType.TRAVEL
+
+        if any(word in text for word in ['event', 'ceremony', 'launch']):
+            return CategoryType.EVENTS
+
+        if any(word in text for word in ['business', 'permit', 'license', 'tender']):
+            return CategoryType.ECONOMY
+
+        return CategoryType.NEWS
+
+    def _create_alert(self, title: str, message: str, url: str) -> None:
+        """
+        Create a public alert
+
+        Args:
+            title: Alert title
+            message: Alert message
+            url: Source URL
+        """
+        try:
+            # Determine alert type and severity
+            alert_type = "general"
+            severity = "medium"
+
+            text = f"{title} {message}".lower()
+
+            if any(word in text for word in ['traffic', 'road']):
+                alert_type = "traffic"
+
+            if any(word in text for word in ['water', 'electricity', 'power']):
+                alert_type = "utility"
+
+            if any(word in text for word in ['security', 'safety']):
+                alert_type = "security"
+
+            if any(word in text for word in ['urgent', 'critical', 'emergency']):
+                severity = "high"
+
+            # Check if alert already exists
+            existing = self.db.query(Alert).filter(
+                Alert.title == title,
+                Alert.is_active == True
+            ).first()
+
+            if not existing:
+                alert = Alert(
+                    title=title,
+                    message=message,
+                    alert_type=alert_type,
+                    severity=severity,
+                    source_name="Nairobi City County",
+                    url=url,
+                    is_active=True
+                )
+
+                self.db.add(alert)
+                self.db.commit()
+
+                logger.info(f"Created alert: {title}")
+
+        except Exception as e:
+            logger.error(f"Error creating alert: {e}")
+            self.db.rollback()
--- a/nairobi-info-collector/app/collectors/news_collector.py
+++ b/nairobi-info-collector/app/collectors/news_collector.py
@ -0,0 +1,340 @@
+"""
+News collector for various Kenyan news sources
+"""
+import logging
+from typing import List, Optional
+from datetime import datetime
+from bs4 import BeautifulSoup
+import feedparser
+
+from app.collectors.base_collector import BaseCollector
+from app.models.data_models import InformationItem, CategoryType, ReliabilityLevel
+from app.config import DATA_SOURCES
+
+logger = logging.getLogger(__name__)
+
+
+class NewsCollector(BaseCollector):
+    """
+    Collector for news sources
+
+    Supports:
+    - Nation Africa
+    - Standard Media
+    - Citizen Digital
+    - BBC Africa
+    - Business Daily
+    """
+
+    def __init__(self, db, news_source: str = "all"):
+        """
+        Initialize news collector
+
+        Args:
+            db: Database session
+            news_source: Specific news source or "all"
+        """
+        super().__init__(db, "News Collector", "news")
+        self.news_source = news_source
+        self.sources_config = DATA_SOURCES.get("news", {})
+
+    def collect(self) -> List[InformationItem]:
+        """Collect news from configured sources"""
+        all_items = []
+
+        if self.news_source == "all":
+            sources = self.sources_config.items()
+        else:
+            source_config = self.sources_config.get(self.news_source)
+            if source_config:
+                sources = [(self.news_source, source_config)]
+            else:
+                logger.error(f"Unknown news source: {self.news_source}")
+                return []
+
+        for source_name, config in sources:
+            if not config.get("enabled", False):
+                logger.info(f"Skipping disabled source: {source_name}")
+                continue
+
+            logger.info(f"Collecting from {source_name}")
+
+            try:
+                items = self._collect_from_source(source_name, config)
+                all_items.extend(items)
+            except Exception as e:
+                logger.error(f"Error collecting from {source_name}: {e}")
+
+        return all_items
+
+    def _collect_from_source(
+        self,
+        source_name: str,
+        config: dict
+    ) -> List[InformationItem]:
+        """
+        Collect from a specific news source
+
+        Args:
+            source_name: Name of the source
+            config: Source configuration
+
+        Returns:
+            List of collected items
+        """
+        items = []
+        url = config.get("url")
+        reliability = config.get("reliability", 0.5)
+
+        # Try RSS feed first
+        rss_url = config.get("rss_url")
+        if rss_url:
+            items.extend(self._collect_from_rss(rss_url, source_name, reliability))
+
+        # Try web scraping if RSS not available or failed
+        if not items and url:
+            items.extend(self._collect_from_web(url, source_name, reliability))
+
+        return items
+
+    def _collect_from_rss(
+        self,
+        rss_url: str,
+        source_name: str,
+        reliability: float
+    ) -> List[InformationItem]:
+        """
+        Collect news from RSS feed
+
+        Args:
+            rss_url: RSS feed URL
+            source_name: Name of the source
+            reliability: Reliability score
+
+        Returns:
+            List of collected items
+        """
+        items = []
+
+        try:
+            feed = feedparser.parse(rss_url)
+
+            for entry in feed.entries[:self.settings.max_items_per_source]:
+                try:
+                    # Parse published date
+                    published_at = None
+                    if hasattr(entry, 'published_parsed') and entry.published_parsed:
+                        published_at = datetime(*entry.published_parsed[:6])
+
+                    # Extract summary
+                    summary = ""
+                    if hasattr(entry, 'summary'):
+                        summary = BeautifulSoup(entry.summary, 'html.parser').get_text()
+
+                    # Determine category
+                    category = self._categorize_content(
+                        entry.title,
+                        summary
+                    )
+
+                    item_data = {
+                        'title': entry.title,
+                        'summary': summary[:500] if summary else None,
+                        'url': entry.link,
+                        'category': category,
+                        'published_at': published_at,
+                        'reliability_level': self._reliability_to_enum(reliability),
+                        'tags': self._extract_tags(entry.title, summary),
+                        'is_verified': reliability >= 0.8
+                    }
+
+                    item = self._save_item(item_data)
+                    if item:
+                        items.append(item)
+
+                except Exception as e:
+                    logger.error(f"Error processing RSS entry: {e}")
+
+        except Exception as e:
+            logger.error(f"Error fetching RSS feed {rss_url}: {e}")
+
+        return items
+
+    def _collect_from_web(
+        self,
+        url: str,
+        source_name: str,
+        reliability: float
+    ) -> List[InformationItem]:
+        """
+        Collect news by web scraping
+
+        Args:
+            url: Website URL
+            source_name: Name of the source
+            reliability: Reliability score
+
+        Returns:
+            List of collected items
+        """
+        items = []
+
+        try:
+            response = self._make_request(url)
+            if not response:
+                return items
+
+            soup = self._parse_html(response.text)
+
+            # Generic article extraction
+            articles = soup.find_all(['article', 'div'], class_=lambda x: x and (
+                'article' in x.lower() or
+                'story' in x.lower() or
+                'post' in x.lower()
+            ))
+
+            for article in articles[:self.settings.max_items_per_source]:
+                try:
+                    # Extract title
+                    title_elem = article.find(['h1', 'h2', 'h3', 'h4'])
+                    if not title_elem:
+                        continue
+
+                    title = title_elem.get_text(strip=True)
+
+                    # Extract link
+                    link_elem = article.find('a', href=True)
+                    if not link_elem:
+                        continue
+
+                    link = link_elem['href']
+                    if link.startswith('/'):
+                        from urllib.parse import urljoin
+                        link = urljoin(url, link)
+
+                    # Extract summary
+                    summary_elem = article.find(['p', 'div'], class_=lambda x: x and (
+                        'summary' in x.lower() or
+                        'excerpt' in x.lower() or
+                        'description' in x.lower()
+                    ))
+                    summary = summary_elem.get_text(strip=True) if summary_elem else ""
+
+                    # Extract image
+                    image_url = None
+                    img_elem = article.find('img', src=True)
+                    if img_elem:
+                        image_url = img_elem['src']
+                        if image_url.startswith('/'):
+                            from urllib.parse import urljoin
+                            image_url = urljoin(url, image_url)
+
+                    # Categorize
+                    category = self._categorize_content(title, summary)
+
+                    item_data = {
+                        'title': title,
+                        'summary': summary[:500] if summary else None,
+                        'url': link,
+                        'image_url': image_url,
+                        'category': category,
+                        'reliability_level': self._reliability_to_enum(reliability),
+                        'tags': self._extract_tags(title, summary),
+                        'is_verified': reliability >= 0.8
+                    }
+
+                    item = self._save_item(item_data)
+                    if item:
+                        items.append(item)
+
+                except Exception as e:
+                    logger.error(f"Error processing article: {e}")
+
+        except Exception as e:
+            logger.error(f"Error scraping {url}: {e}")
+
+        return items
+
+    def _categorize_content(self, title: str, content: str) -> CategoryType:
+        """
+        Categorize content based on title and content
+
+        Args:
+            title: Article title
+            content: Article content
+
+        Returns:
+            CategoryType enum
+        """
+        text = f"{title} {content}".lower()
+
+        # Breaking news
+        if any(word in text for word in ['breaking', 'urgent', 'just in', 'alert']):
+            return CategoryType.BREAKING
+
+        # Events
+        if any(word in text for word in ['event', 'concert', 'festival', 'exhibition']):
+            return CategoryType.EVENTS
+
+        # Economy/Business
+        if any(word in text for word in ['economy', 'business', 'market', 'trade', 'investment']):
+            return CategoryType.ECONOMY
+
+        # Food/Nightlife
+        if any(word in text for word in ['restaurant', 'food', 'dining', 'nightlife']):
+            return CategoryType.FOOD
+
+        # Travel/Transport
+        if any(word in text for word in ['traffic', 'transport', 'road', 'airport']):
+            return CategoryType.TRAVEL
+
+        # Default to news
+        return CategoryType.NEWS
+
+    def _extract_tags(self, title: str, content: str) -> list:
+        """
+        Extract relevant tags from content
+
+        Args:
+            title: Article title
+            content: Article content
+
+        Returns:
+            List of tags
+        """
+        tags = []
+        text = f"{title} {content}".lower()
+
+        # Common Nairobi locations
+        locations = [
+            'westlands', 'kileleshwa', 'karen', 'ngong', 'cbd',
+            'kilimani', 'lavington', 'parklands', 'eastleigh'
+        ]
+        for loc in locations:
+            if loc in text:
+                tags.append(loc)
+
+        # Topics
+        topics = [
+            'politics', 'sports', 'entertainment', 'technology',
+            'health', 'education', 'crime', 'weather'
+        ]
+        for topic in topics:
+            if topic in text:
+                tags.append(topic)
+
+        return list(set(tags))
+
+    @staticmethod
+    def _reliability_to_enum(score: float) -> ReliabilityLevel:
+        """Convert reliability score to enum"""
+        if score >= 0.9:
+            return ReliabilityLevel.VERIFIED
+        elif score >= 0.7:
+            return ReliabilityLevel.HIGH
+        elif score >= 0.5:
+            return ReliabilityLevel.MEDIUM
+        elif score >= 0.3:
+            return ReliabilityLevel.LOW
+        else:
+            return ReliabilityLevel.UNVERIFIED
--- a/nairobi-info-collector/app/collectors/social_media_collector.py
+++ b/nairobi-info-collector/app/collectors/social_media_collector.py
@ -0,0 +1,310 @@
+"""
+Social media collector for Twitter, Instagram, TikTok, etc.
+"""
+import logging
+from typing import List, Optional, Dict, Any
+from datetime import datetime, timedelta
+import json
+
+from app.collectors.base_collector import BaseCollector
+from app.models.data_models import (
+    InformationItem, TrendingTopic, CategoryType, ReliabilityLevel
+)
+from app.config import DATA_SOURCES, get_settings
+
+logger = logging.getLogger(__name__)
+settings = get_settings()
+
+
+class SocialMediaCollector(BaseCollector):
+    """
+    Collector for social media platforms
+
+    Supports:
+    - Twitter/X (via API)
+    - Instagram (via unofficial API)
+    - TikTok trending
+    - Facebook (via Graph API)
+    """
+
+    def __init__(self, db, platform: str = "all"):
+        """
+        Initialize social media collector
+
+        Args:
+            db: Database session
+            platform: Specific platform or "all"
+        """
+        super().__init__(db, "Social Media Collector", "social_media")
+        self.platform = platform
+        self.config = DATA_SOURCES.get("social_media", {})
+
+    def collect(self) -> List[InformationItem]:
+        """Collect social media data"""
+        all_items = []
+
+        if self.platform == "all" or self.platform == "twitter":
+            all_items.extend(self._collect_twitter())
+
+        if self.platform == "all" or self.platform == "instagram":
+            all_items.extend(self._collect_instagram())
+
+        if self.platform == "all" or self.platform == "tiktok":
+            all_items.extend(self._collect_tiktok())
+
+        return all_items
+
+    def _collect_twitter(self) -> List[InformationItem]:
+        """
+        Collect trending topics and posts from Twitter/X
+
+        Returns:
+            List of information items
+        """
+        items = []
+
+        if not settings.twitter_bearer_token:
+            logger.warning("Twitter API credentials not configured")
+            return items
+
+        try:
+            import tweepy
+
+            # Initialize Twitter API client
+            client = tweepy.Client(bearer_token=settings.twitter_bearer_token)
+
+            hashtags = self.config.get("twitter", {}).get("hashtags", [])
+
+            for hashtag in hashtags:
+                try:
+                    # Search recent tweets
+                    tweets = client.search_recent_tweets(
+                        query=f"{hashtag} -is:retweet lang:en",
+                        max_results=20,
+                        tweet_fields=['created_at', 'public_metrics', 'entities']
+                    )
+
+                    if not tweets.data:
+                        continue
+
+                    for tweet in tweets.data:
+                        # Skip if low engagement
+                        metrics = tweet.public_metrics
+                        engagement = (
+                            metrics.get('like_count', 0) +
+                            metrics.get('retweet_count', 0) * 2 +
+                            metrics.get('reply_count', 0)
+                        )
+
+                        if engagement < 10:  # Minimum engagement threshold
+                            continue
+
+                        # Extract entities
+                        entities = {}
+                        if hasattr(tweet, 'entities'):
+                            if 'hashtags' in tweet.entities:
+                                entities['hashtags'] = [
+                                    tag['tag'] for tag in tweet.entities['hashtags']
+                                ]
+                            if 'mentions' in tweet.entities:
+                                entities['mentions'] = [
+                                    m['username'] for m in tweet.entities['mentions']
+                                ]
+
+                        # Determine if trending
+                        is_trending = engagement > 100
+
+                        item_data = {
+                            'title': f"Tweet: {tweet.text[:100]}...",
+                            'summary': tweet.text,
+                            'url': f"https://twitter.com/i/status/{tweet.id}",
+                            'category': CategoryType.SOCIAL,
+                            'published_at': tweet.created_at,
+                            'reliability_level': ReliabilityLevel.MEDIUM,
+                            'tags': [hashtag.replace('#', '')],
+                            'entities': entities,
+                            'is_featured': is_trending
+                        }
+
+                        item = self._save_item(item_data)
+                        if item:
+                            items.append(item)
+
+                        # Track trending topic
+                        if is_trending:
+                            self._track_trending_topic(
+                                hashtag,
+                                'twitter',
+                                engagement,
+                                {'tweet_id': tweet.id, 'text': tweet.text}
+                            )
+
+                except Exception as e:
+                    logger.error(f"Error collecting Twitter data for {hashtag}: {e}")
+
+        except ImportError:
+            logger.error("tweepy not installed. Run: pip install tweepy")
+        except Exception as e:
+            logger.error(f"Error in Twitter collection: {e}")
+
+        return items
+
+    def _collect_instagram(self) -> List[InformationItem]:
+        """
+        Collect trending posts from Instagram
+
+        Returns:
+            List of information items
+        """
+        items = []
+
+        if not settings.instagram_username or not settings.instagram_password:
+            logger.warning("Instagram credentials not configured")
+            return items
+
+        try:
+            from instagrapi import Client
+
+            client = Client()
+            client.login(settings.instagram_username, settings.instagram_password)
+
+            hashtags = self.config.get("instagram", {}).get("hashtags", [])
+
+            for hashtag in hashtags:
+                try:
+                    # Get top posts for hashtag
+                    medias = client.hashtag_medias_top(hashtag, amount=20)
+
+                    for media in medias:
+                        # Get media info
+                        like_count = media.like_count
+                        comment_count = media.comment_count
+
+                        # Skip low engagement
+                        if like_count < 50:
+                            continue
+
+                        item_data = {
+                            'title': f"Instagram Post: {media.caption_text[:100] if media.caption_text else 'No caption'}",
+                            'summary': media.caption_text[:500] if media.caption_text else "",
+                            'url': f"https://www.instagram.com/p/{media.code}/",
+                            'image_url': media.thumbnail_url,
+                            'category': CategoryType.SOCIAL,
+                            'published_at': media.taken_at,
+                            'reliability_level': ReliabilityLevel.MEDIUM,
+                            'tags': [hashtag],
+                            'is_featured': like_count > 500
+                        }
+
+                        item = self._save_item(item_data)
+                        if item:
+                            items.append(item)
+
+                except Exception as e:
+                    logger.error(f"Error collecting Instagram data for {hashtag}: {e}")
+
+        except ImportError:
+            logger.error("instagrapi not installed. Run: pip install instagrapi")
+        except Exception as e:
+            logger.error(f"Error in Instagram collection: {e}")
+
+        return items
+
+    def _collect_tiktok(self) -> List[InformationItem]:
+        """
+        Collect trending videos from TikTok
+
+        Returns:
+            List of information items
+        """
+        items = []
+
+        # Note: TikTok API access is limited. This is a placeholder for future implementation
+        # You would need TikTok API credentials and use their official API
+
+        logger.info("TikTok collection not yet implemented")
+
+        return items
+
+    def _track_trending_topic(
+        self,
+        topic: str,
+        platform: str,
+        mention_count: int,
+        metadata: Dict[str, Any]
+    ) -> None:
+        """
+        Track a trending topic in the database
+
+        Args:
+            topic: The trending topic/hashtag
+            platform: Social media platform
+            mention_count: Number of mentions
+            metadata: Additional metadata
+        """
+        try:
+            # Check if topic already exists
+            existing = self.db.query(TrendingTopic).filter(
+                TrendingTopic.topic == topic,
+                TrendingTopic.platform == platform
+            ).first()
+
+            if existing:
+                # Update existing
+                existing.mention_count += mention_count
+                existing.last_updated = datetime.utcnow()
+                if existing.related_content:
+                    existing.related_content.append(metadata)
+                else:
+                    existing.related_content = [metadata]
+            else:
+                # Create new
+                trending = TrendingTopic(
+                    topic=topic,
+                    platform=platform,
+                    mention_count=mention_count,
+                    related_content=[metadata]
+                )
+                self.db.add(trending)
+
+            self.db.commit()
+
+        except Exception as e:
+            logger.error(f"Error tracking trending topic: {e}")
+            self.db.rollback()
+
+    def get_trending_topics(self, platform: Optional[str] = None, limit: int = 10) -> List[Dict]:
+        """
+        Get current trending topics
+
+        Args:
+            platform: Filter by platform
+            limit: Maximum number of topics to return
+
+        Returns:
+            List of trending topics
+        """
+        query = self.db.query(TrendingTopic)
+
+        if platform:
+            query = query.filter(TrendingTopic.platform == platform)
+
+        # Get topics from last 24 hours
+        since = datetime.utcnow() - timedelta(days=1)
+        query = query.filter(TrendingTopic.last_updated >= since)
+
+        # Order by mention count
+        topics = query.order_by(
+            TrendingTopic.mention_count.desc()
+        ).limit(limit).all()
+
+        return [
+            {
+                'topic': t.topic,
+                'platform': t.platform,
+                'mention_count': t.mention_count,
+                'first_seen': t.first_seen.isoformat() if t.first_seen else None,
+                'last_updated': t.last_updated.isoformat() if t.last_updated else None
+            }
+            for t in topics
+        ]
--- a/nairobi-info-collector/app/collectors/tourism_collector.py
+++ b/nairobi-info-collector/app/collectors/tourism_collector.py
@ -0,0 +1,221 @@
+"""
+Tourism and hospitality data collector
+"""
+import logging
+from typing import List, Optional
+from datetime import datetime
+
+from app.collectors.base_collector import BaseCollector
+from app.models.data_models import InformationItem, CategoryType, ReliabilityLevel
+from app.config import DATA_SOURCES, get_settings
+
+logger = logging.getLogger(__name__)
+settings = get_settings()
+
+
+class TourismCollector(BaseCollector):
+    """
+    Collector for tourism and hospitality information
+
+    Sources:
+    - Google Maps/Places API (restaurants, hotels, attractions)
+    - TripAdvisor
+    - Tourism websites
+    """
+
+    def __init__(self, db):
+        super().__init__(db, "Tourism Collector", "tourism")
+        self.config = DATA_SOURCES.get("tourism", {})
+
+    def collect(self) -> List[InformationItem]:
+        """Collect tourism data"""
+        all_items = []
+
+        all_items.extend(self._collect_google_places())
+        all_items.extend(self._collect_tripadvisor())
+
+        return all_items
+
+    def _collect_google_places(self) -> List[InformationItem]:
+        """
+        Collect new places and reviews from Google Maps
+
+        Returns:
+            List of information items
+        """
+        items = []
+
+        if not settings.google_maps_api_key:
+            logger.warning("Google Maps API key not configured")
+            return items
+
+        try:
+            import googlemaps
+
+            gmaps = googlemaps.Client(key=settings.google_maps_api_key)
+
+            # Nairobi coordinates
+            location = (-1.286389, 36.817223)
+
+            # Search for different types of places
+            place_types = [
+                'restaurant',
+                'cafe',
+                'bar',
+                'hotel',
+                'tourist_attraction',
+                'museum'
+            ]
+
+            for place_type in place_types:
+                try:
+                    # Search for recently added places
+                    results = gmaps.places_nearby(
+                        location=location,
+                        radius=10000,  # 10km radius
+                        type=place_type,
+                        keyword='new OR opening'
+                    )
+
+                    for place in results.get('results', [])[:20]:
+                        try:
+                            place_id = place.get('place_id')
+
+                            # Get place details
+                            details = gmaps.place(
+                                place_id=place_id,
+                                fields=[
+                                    'name', 'rating', 'formatted_address',
+                                    'opening_hours', 'photos', 'reviews', 'website'
+                                ]
+                            ).get('result', {})
+
+                            name = details.get('name', '')
+                            rating = details.get('rating', 0)
+                            address = details.get('formatted_address', '')
+                            website = details.get('website')
+
+                            # Get photo URL
+                            image_url = None
+                            photos = details.get('photos', [])
+                            if photos:
+                                photo_reference = photos[0].get('photo_reference')
+                                image_url = f"https://maps.googleapis.com/maps/api/place/photo?maxwidth=400&photoreference={photo_reference}&key={settings.google_maps_api_key}"
+
+                            # Get recent review
+                            reviews = details.get('reviews', [])
+                            recent_review = reviews[0].get('text', '') if reviews else ''
+
+                            # Determine category
+                            category = CategoryType.PLACES
+                            if place_type in ['restaurant', 'cafe']:
+                                category = CategoryType.FOOD
+
+                            item_data = {
+                                'title': f"New {place_type.replace('_', ' ').title()}: {name}",
+                                'summary': f"Rating: {rating}/5.0 - {address}",
+                                'content': recent_review[:500] if recent_review else None,
+                                'url': website or f"https://www.google.com/maps/place/?q=place_id:{place_id}",
+                                'image_url': image_url,
+                                'category': category,
+                                'location': address,
+                                'coordinates': {
+                                    'lat': place.get('geometry', {}).get('location', {}).get('lat'),
+                                    'lng': place.get('geometry', {}).get('location', {}).get('lng')
+                                },
+                                'reliability_level': ReliabilityLevel.HIGH,
+                                'tags': [place_type, 'new opening'],
+                                'is_verified': True
+                            }
+
+                            item = self._save_item(item_data)
+                            if item:
+                                items.append(item)
+
+                        except Exception as e:
+                            logger.error(f"Error processing place: {e}")
+
+                except Exception as e:
+                    logger.error(f"Error searching for {place_type}: {e}")
+
+        except ImportError:
+            logger.error("googlemaps not installed. Run: pip install googlemaps")
+        except Exception as e:
+            logger.error(f"Error in Google Places collection: {e}")
+
+        return items
+
+    def _collect_tripadvisor(self) -> List[InformationItem]:
+        """
+        Collect reviews and updates from TripAdvisor
+
+        Note: TripAdvisor API access is limited. This is a web scraping approach.
+
+        Returns:
+            List of information items
+        """
+        items = []
+        config = self.config.get("tripadvisor", {})
+
+        if not config.get("enabled"):
+            return items
+
+        url = config.get("url")
+
+        try:
+            response = self._make_request(url)
+            if not response:
+                return items
+
+            soup = self._parse_html(response.text)
+
+            # Find attraction/restaurant listings
+            listings = soup.find_all(['div'], class_=lambda x: x and (
+                'listing' in x.lower() or
+                'attraction' in x.lower()
+            ))
+
+            for listing in listings[:self.settings.max_items_per_source]:
+                try:
+                    # Extract name
+                    name_elem = listing.find(['h2', 'h3'], class_=lambda x: x and 'title' in x.lower())
+                    if not name_elem:
+                        continue
+
+                    name = name_elem.get_text(strip=True)
+
+                    # Extract rating
+                    rating_elem = listing.find(class_=lambda x: x and 'rating' in x.lower())
+                    rating = rating_elem.get_text(strip=True) if rating_elem else ""
+
+                    # Extract link
+                    link_elem = listing.find('a', href=True)
+                    link = link_elem['href'] if link_elem else ""
+                    if link.startswith('/'):
+                        link = f"https://www.tripadvisor.com{link}"
+
+                    # Extract review snippet
+                    review_elem = listing.find(class_=lambda x: x and 'review' in x.lower())
+                    review = review_elem.get_text(strip=True) if review_elem else ""
+
+                    item_data = {
+                        'title': name,
+                        'summary': f"{rating} - {review[:200]}",
+                        'url': link,
+                        'category': CategoryType.PLACES,
+                        'reliability_level': ReliabilityLevel.MEDIUM,
+                        'tags': ['tripadvisor', 'tourism'],
+                        'is_verified': False
+                    }
+
+                    item = self._save_item(item_data)
+                    if item:
+                        items.append(item)
+
+                except Exception as e:
+                    logger.error(f"Error processing TripAdvisor listing: {e}")
+
+        except Exception as e:
+            logger.error(f"Error collecting from TripAdvisor: {e}")
+
+        return items
--- a/nairobi-info-collector/app/config.py
+++ b/nairobi-info-collector/app/config.py
@ -0,0 +1,250 @@
+"""
+Configuration management for Nairobi Information Collector
+"""
+from pydantic_settings import BaseSettings
+from typing import List, Optional
+from functools import lru_cache
+
+
+class Settings(BaseSettings):
+    """Application settings loaded from environment variables"""
+
+    # Application
+    app_name: str = "Nairobi Information Collector"
+    app_version: str = "1.0.0"
+    debug: bool = False
+    environment: str = "production"
+
+    # Server
+    host: str = "0.0.0.0"
+    port: int = 8000
+
+    # Database
+    database_url: str = "sqlite:///./nairobi_info.db"
+
+    # Redis
+    redis_url: str = "redis://localhost:6379/0"
+    redis_password: Optional[str] = None
+
+    # API Keys - News
+    news_api_key: Optional[str] = None
+
+    # API Keys - Social Media
+    twitter_api_key: Optional[str] = None
+    twitter_api_secret: Optional[str] = None
+    twitter_access_token: Optional[str] = None
+    twitter_access_secret: Optional[str] = None
+    twitter_bearer_token: Optional[str] = None
+
+    instagram_username: Optional[str] = None
+    instagram_password: Optional[str] = None
+
+    # API Keys - Maps
+    google_maps_api_key: Optional[str] = None
+    foursquare_api_key: Optional[str] = None
+
+    # API Keys - NLP
+    openai_api_key: Optional[str] = None
+    anthropic_api_key: Optional[str] = None
+
+    # Collection Settings
+    collection_interval_seconds: int = 300
+    max_items_per_source: int = 100
+    request_timeout_seconds: int = 30
+    max_retries: int = 3
+
+    # Rate Limiting
+    rate_limit_requests_per_minute: int = 60
+    rate_limit_requests_per_hour: int = 1000
+
+    # Scraping
+    user_agent: str = "Mozilla/5.0 (compatible; NairobiInfoBot/1.0)"
+    respect_robots_txt: bool = True
+    enable_caching: bool = True
+    cache_ttl_seconds: int = 3600
+
+    # Data Processing
+    enable_nlp_processing: bool = True
+    enable_sentiment_analysis: bool = True
+    enable_auto_categorization: bool = True
+    min_reliability_score: float = 0.5
+
+    # Logging
+    log_level: str = "INFO"
+    log_file: str = "logs/nairobi_collector.log"
+
+    # Security
+    secret_key: str = "change-this-in-production"
+    api_key_header: str = "X-API-Key"
+    allowed_origins: str = "http://localhost:3000,http://localhost:8000"
+
+    # Monitoring
+    sentry_dsn: Optional[str] = None
+    enable_metrics: bool = True
+    metrics_port: int = 9090
+
+    # Feature Flags
+    enable_social_media_collection: bool = True
+    enable_news_collection: bool = True
+    enable_government_collection: bool = True
+    enable_tourism_collection: bool = True
+    enable_business_collection: bool = True
+
+    # Email
+    smtp_host: str = "smtp.gmail.com"
+    smtp_port: int = 587
+    smtp_username: Optional[str] = None
+    smtp_password: Optional[str] = None
+    alert_email_recipients: Optional[str] = None
+
+    class Config:
+        env_file = ".env"
+        case_sensitive = False
+
+    @property
+    def allowed_origins_list(self) -> List[str]:
+        """Parse allowed origins into a list"""
+        return [origin.strip() for origin in self.allowed_origins.split(",")]
+
+    @property
+    def alert_recipients_list(self) -> List[str]:
+        """Parse alert recipients into a list"""
+        if not self.alert_email_recipients:
+            return []
+        return [email.strip() for email in self.alert_email_recipients.split(",")]
+
+
+@lru_cache()
+def get_settings() -> Settings:
+    """Get cached settings instance"""
+    return Settings()
+
+
+# Data source configurations
+DATA_SOURCES = {
+    "news": {
+        "nation_africa": {
+            "url": "https://nation.africa/kenya/counties/nairobi",
+            "enabled": True,
+            "reliability": 0.9
+        },
+        "standard_media": {
+            "url": "https://www.standardmedia.co.ke/nairobi",
+            "enabled": True,
+            "reliability": 0.9
+        },
+        "citizen_digital": {
+            "url": "https://www.citizen.digital/news",
+            "enabled": True,
+            "reliability": 0.85
+        },
+        "bbc_africa": {
+            "url": "https://www.bbc.com/news/topics/c302m85q53mt",
+            "enabled": True,
+            "reliability": 0.95
+        },
+        "business_daily": {
+            "url": "https://www.businessdailyafrica.com/bd/economy",
+            "enabled": True,
+            "reliability": 0.9
+        }
+    },
+    "government": {
+        "nairobi_county": {
+            "url": "https://nairobi.go.ke",
+            "enabled": True,
+            "reliability": 1.0
+        },
+        "kenya_open_data": {
+            "url": "https://www.opendata.go.ke",
+            "enabled": True,
+            "reliability": 1.0
+        }
+    },
+    "tourism": {
+        "tripadvisor": {
+            "url": "https://www.tripadvisor.com/Tourism-g294207-Nairobi-Vacations.html",
+            "enabled": True,
+            "reliability": 0.8
+        },
+        "google_maps": {
+            "api_url": "https://maps.googleapis.com/maps/api/place",
+            "enabled": True,
+            "reliability": 0.85
+        }
+    },
+    "social_media": {
+        "twitter": {
+            "hashtags": [
+                "#Nairobi", "#NairobiKenya", "#VisitNairobi",
+                "#NairobiLife", "#254", "#KenyaNews"
+            ],
+            "enabled": True,
+            "reliability": 0.6
+        },
+        "instagram": {
+            "hashtags": [
+                "nairobi", "nairobidiaries", "nairobikenya",
+                "visitnairobi", "nairobilife"
+            ],
+            "enabled": True,
+            "reliability": 0.6
+        }
+    },
+    "business": {
+        "techcabal": {
+            "url": "https://techcabal.com/category/kenya/",
+            "enabled": True,
+            "reliability": 0.85
+        }
+    }
+}
+
+# Information categories
+CATEGORIES = {
+    "breaking": {
+        "name": "Breaking Updates",
+        "keywords": ["breaking", "urgent", "alert", "just in", "developing"],
+        "priority": 1
+    },
+    "news": {
+        "name": "City Life & Alerts",
+        "keywords": ["news", "update", "announcement", "report"],
+        "priority": 2
+    },
+    "events": {
+        "name": "Culture & Events",
+        "keywords": ["event", "concert", "festival", "exhibition", "show"],
+        "priority": 3
+    },
+    "economy": {
+        "name": "Business & Economy",
+        "keywords": ["business", "economy", "startup", "investment", "market"],
+        "priority": 4
+    },
+    "food": {
+        "name": "Food & Nightlife",
+        "keywords": ["restaurant", "food", "dining", "nightlife", "bar", "cafe"],
+        "priority": 5
+    },
+    "social": {
+        "name": "Social Media Trends",
+        "keywords": ["trending", "viral", "hashtag"],
+        "priority": 6
+    },
+    "travel": {
+        "name": "Travel & Movement",
+        "keywords": ["traffic", "transport", "airport", "road", "transit"],
+        "priority": 7
+    },
+    "places": {
+        "name": "New Places / Reviews",
+        "keywords": ["opening", "new", "review", "rating"],
+        "priority": 8
+    },
+    "community": {
+        "name": "Community Stories",
+        "keywords": ["community", "story", "people", "charity", "initiative"],
+        "priority": 9
+    }
+}
--- a/nairobi-info-collector/app/database/init.py
+++ b/nairobi-info-collector/app/database/init.py
@ -0,0 +1,6 @@
+"""
+Database connection and session management
+"""
+from .db import get_db, engine, SessionLocal, init_db
+
+__all__ = ["get_db", "engine", "SessionLocal", "init_db"]
--- a/nairobi-info-collector/app/database/db.py
+++ b/nairobi-info-collector/app/database/db.py
@ -0,0 +1,72 @@
+"""
+Database connection and initialization
+"""
+from sqlalchemy import create_engine
+from sqlalchemy.orm import sessionmaker, Session
+from typing import Generator
+import logging
+
+from app.config import get_settings
+from app.models.data_models import Base
+
+logger = logging.getLogger(__name__)
+
+settings = get_settings()
+
+# Create database engine
+engine = create_engine(
+    settings.database_url,
+    echo=settings.debug,
+    pool_pre_ping=True,
+    pool_size=10,
+    max_overflow=20
+)
+
+# Create session factory
+SessionLocal = sessionmaker(
+    autocommit=False,
+    autoflush=False,
+    bind=engine
+)
+
+
+def get_db() -> Generator[Session, None, None]:
+    """
+    Get database session
+
+    Yields:
+        Database session
+    """
+    db = SessionLocal()
+    try:
+        yield db
+    finally:
+        db.close()
+
+
+def init_db() -> None:
+    """
+    Initialize database - create all tables
+    """
+    try:
+        logger.info("Creating database tables...")
+        Base.metadata.create_all(bind=engine)
+        logger.info("Database tables created successfully!")
+    except Exception as e:
+        logger.error(f"Error creating database tables: {e}")
+        raise
+
+
+def drop_db() -> None:
+    """
+    Drop all database tables (use with caution!)
+    """
+    logger.warning("Dropping all database tables...")
+    Base.metadata.drop_all(bind=engine)
+    logger.info("Database tables dropped!")
+
+
+if __name__ == "__main__":
+    # Initialize database when run directly
+    logging.basicConfig(level=logging.INFO)
+    init_db()
--- a/nairobi-info-collector/app/main.py
+++ b/nairobi-info-collector/app/main.py
@ -0,0 +1,119 @@
+"""
+Main FastAPI application
+"""
+import logging
+from contextlib import asynccontextmanager
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse
+
+from app.config import get_settings
+from app.database import init_db
+from app.api.routes import router
+from app.scheduler.tasks import start_scheduler, stop_scheduler
+
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    handlers=[
+        logging.FileHandler('logs/app.log'),
+        logging.StreamHandler()
+    ]
+)
+
+logger = logging.getLogger(__name__)
+settings = get_settings()
+
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """
+    Application lifespan manager
+
+    Handles startup and shutdown events
+    """
+    # Startup
+    logger.info("Starting Nairobi Information Collector")
+
+    # Initialize database
+    try:
+        init_db()
+        logger.info("Database initialized")
+    except Exception as e:
+        logger.error(f"Database initialization failed: {e}")
+
+    # Start scheduler
+    try:
+        start_scheduler()
+        logger.info("Scheduler started")
+    except Exception as e:
+        logger.error(f"Scheduler failed to start: {e}")
+
+    yield
+
+    # Shutdown
+    logger.info("Shutting down Nairobi Information Collector")
+
+    try:
+        stop_scheduler()
+        logger.info("Scheduler stopped")
+    except Exception as e:
+        logger.error(f"Error stopping scheduler: {e}")
+
+
+# Create FastAPI app
+app = FastAPI(
+    title=settings.app_name,
+    version=settings.app_version,
+    description="Advanced Intelligence Retrieval System for Nairobi, Kenya",
+    lifespan=lifespan
+)
+
+# CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=settings.allowed_origins_list,
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+# Include API routes
+app.include_router(router)
+
+
+@app.get("/")
+async def root():
+    """Root endpoint"""
+    return {
+        "name": settings.app_name,
+        "version": settings.app_version,
+        "description": "Advanced Intelligence Retrieval System for Nairobi, Kenya",
+        "docs": "/docs",
+        "api": "/api/v1"
+    }
+
+
+@app.exception_handler(Exception)
+async def global_exception_handler(request, exc):
+    """Global exception handler"""
+    logger.error(f"Unhandled exception: {exc}", exc_info=True)
+    return JSONResponse(
+        status_code=500,
+        content={
+            "detail": "Internal server error",
+            "error": str(exc) if settings.debug else "An error occurred"
+        }
+    )
+
+
+if __name__ == "__main__":
+    import uvicorn
+
+    uvicorn.run(
+        "app.main:app",
+        host=settings.host,
+        port=settings.port,
+        reload=settings.debug
+    )
--- a/nairobi-info-collector/app/models/init.py
+++ b/nairobi-info-collector/app/models/init.py
@ -0,0 +1,20 @@
+"""
+Data models for Nairobi Information Collector
+"""
+from .data_models import (
+    InformationItem,
+    InformationBrief,
+    Source,
+    Alert,
+    TrendingTopic,
+    Category
+)
+
+__all__ = [
+    "InformationItem",
+    "InformationBrief",
+    "Source",
+    "Alert",
+    "TrendingTopic",
+    "Category"
+]
--- a/nairobi-info-collector/app/models/data_models.py
+++ b/nairobi-info-collector/app/models/data_models.py
@ -0,0 +1,306 @@
+"""
+SQLAlchemy models and Pydantic schemas for data structures
+"""
+from sqlalchemy import (
+    Column, Integer, String, Text, DateTime, Float, Boolean,
+    ForeignKey, JSON, Enum as SQLEnum
+)
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import relationship
+from datetime import datetime
+from pydantic import BaseModel, Field, HttpUrl
+from typing import Optional, List, Dict, Any
+from enum import Enum
+
+Base = declarative_base()
+
+
+# Enums
+class CategoryType(str, Enum):
+    """Information category types"""
+    BREAKING = "breaking"
+    NEWS = "news"
+    EVENTS = "events"
+    ECONOMY = "economy"
+    FOOD = "food"
+    SOCIAL = "social"
+    TRAVEL = "travel"
+    PLACES = "places"
+    COMMUNITY = "community"
+
+
+class ReliabilityLevel(str, Enum):
+    """Source reliability levels"""
+    VERIFIED = "verified"
+    HIGH = "high"
+    MEDIUM = "medium"
+    LOW = "low"
+    UNVERIFIED = "unverified"
+
+
+# SQLAlchemy Models (Database Tables)
+
+class Source(Base):
+    """Data source information"""
+    __tablename__ = "sources"
+
+    id = Column(Integer, primary_key=True, index=True)
+    name = Column(String(255), unique=True, nullable=False)
+    url = Column(String(500))
+    source_type = Column(String(50))  # news, social_media, government, etc.
+    reliability_score = Column(Float, default=0.5)
+    is_active = Column(Boolean, default=True)
+    created_at = Column(DateTime, default=datetime.utcnow)
+    updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
+
+    # Relationships
+    information_items = relationship("InformationItem", back_populates="source")
+
+
+class InformationItem(Base):
+    """Individual piece of information collected"""
+    __tablename__ = "information_items"
+
+    id = Column(Integer, primary_key=True, index=True)
+    title = Column(String(500), nullable=False)
+    summary = Column(Text)
+    content = Column(Text)
+    category = Column(SQLEnum(CategoryType), nullable=False)
+    url = Column(String(1000))
+    image_url = Column(String(1000))
+
+    # Source information
+    source_id = Column(Integer, ForeignKey("sources.id"))
+    source_name = Column(String(255))
+    reliability_level = Column(SQLEnum(ReliabilityLevel), default=ReliabilityLevel.MEDIUM)
+
+    # Metadata
+    published_at = Column(DateTime)
+    collected_at = Column(DateTime, default=datetime.utcnow)
+    location = Column(String(255))  # Specific location in Nairobi
+    coordinates = Column(JSON)  # {"lat": -1.286389, "lng": 36.817223}
+
+    # Processing
+    sentiment_score = Column(Float)  # -1 to 1
+    importance_score = Column(Float)  # 0 to 1
+    tags = Column(JSON)  # List of tags
+    entities = Column(JSON)  # Extracted entities (people, places, organizations)
+
+    # Flags
+    is_verified = Column(Boolean, default=False)
+    is_featured = Column(Boolean, default=False)
+    is_alert = Column(Boolean, default=False)
+
+    # Relationships
+    source = relationship("Source", back_populates="information_items")
+
+    # Indexes
+    __table_args__ = (
+        {'extend_existing': True}
+    )
+
+
+class Alert(Base):
+    """High-priority alerts and notifications"""
+    __tablename__ = "alerts"
+
+    id = Column(Integer, primary_key=True, index=True)
+    title = Column(String(500), nullable=False)
+    message = Column(Text, nullable=False)
+    alert_type = Column(String(50))  # traffic, weather, security, utility, etc.
+    severity = Column(String(20))  # low, medium, high, critical
+    area_affected = Column(String(255))
+    coordinates = Column(JSON)
+    source_name = Column(String(255))
+    url = Column(String(1000))
+
+    created_at = Column(DateTime, default=datetime.utcnow)
+    expires_at = Column(DateTime)
+    is_active = Column(Boolean, default=True)
+
+    metadata = Column(JSON)
+
+
+class TrendingTopic(Base):
+    """Trending topics and hashtags"""
+    __tablename__ = "trending_topics"
+
+    id = Column(Integer, primary_key=True, index=True)
+    topic = Column(String(255), nullable=False)
+    platform = Column(String(50))  # twitter, instagram, tiktok, etc.
+    mention_count = Column(Integer, default=0)
+    sentiment_score = Column(Float)
+
+    first_seen = Column(DateTime, default=datetime.utcnow)
+    last_updated = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
+
+    related_content = Column(JSON)  # Sample posts/content
+    metadata = Column(JSON)
+
+
+class InformationBrief(Base):
+    """Generated intelligence briefs"""
+    __tablename__ = "information_briefs"
+
+    id = Column(Integer, primary_key=True, index=True)
+    generated_at = Column(DateTime, default=datetime.utcnow)
+    period_start = Column(DateTime)
+    period_end = Column(DateTime)
+
+    # Brief sections (stored as JSON)
+    breaking_updates = Column(JSON)
+    city_life = Column(JSON)
+    culture_events = Column(JSON)
+    business_economy = Column(JSON)
+    food_nightlife = Column(JSON)
+    social_trends = Column(JSON)
+    travel_movement = Column(JSON)
+    new_places = Column(JSON)
+    community_stories = Column(JSON)
+
+    # Metadata
+    total_items = Column(Integer)
+    sources_count = Column(Integer)
+
+    # Export
+    markdown_content = Column(Text)
+    html_content = Column(Text)
+
+
+# Pydantic Schemas (API Request/Response)
+
+class SourceSchema(BaseModel):
+    """Source schema for API"""
+    id: Optional[int] = None
+    name: str
+    url: Optional[str] = None
+    source_type: str
+    reliability_score: float = Field(ge=0, le=1)
+    is_active: bool = True
+    created_at: Optional[datetime] = None
+
+    class Config:
+        from_attributes = True
+
+
+class InformationItemSchema(BaseModel):
+    """Information item schema for API"""
+    id: Optional[int] = None
+    title: str
+    summary: Optional[str] = None
+    content: Optional[str] = None
+    category: CategoryType
+    url: Optional[str] = None
+    image_url: Optional[str] = None
+
+    source_name: str
+    reliability_level: ReliabilityLevel = ReliabilityLevel.MEDIUM
+
+    published_at: Optional[datetime] = None
+    collected_at: Optional[datetime] = None
+    location: Optional[str] = None
+    coordinates: Optional[Dict[str, float]] = None
+
+    sentiment_score: Optional[float] = Field(None, ge=-1, le=1)
+    importance_score: Optional[float] = Field(None, ge=0, le=1)
+    tags: Optional[List[str]] = []
+    entities: Optional[Dict[str, List[str]]] = {}
+
+    is_verified: bool = False
+    is_featured: bool = False
+    is_alert: bool = False
+
+    class Config:
+        from_attributes = True
+
+
+class AlertSchema(BaseModel):
+    """Alert schema for API"""
+    id: Optional[int] = None
+    title: str
+    message: str
+    alert_type: str
+    severity: str
+    area_affected: Optional[str] = None
+    coordinates: Optional[Dict[str, float]] = None
+    source_name: str
+    url: Optional[str] = None
+
+    created_at: Optional[datetime] = None
+    expires_at: Optional[datetime] = None
+    is_active: bool = True
+
+    metadata: Optional[Dict[str, Any]] = {}
+
+    class Config:
+        from_attributes = True
+
+
+class TrendingTopicSchema(BaseModel):
+    """Trending topic schema for API"""
+    id: Optional[int] = None
+    topic: str
+    platform: str
+    mention_count: int = 0
+    sentiment_score: Optional[float] = None
+
+    first_seen: Optional[datetime] = None
+    last_updated: Optional[datetime] = None
+
+    related_content: Optional[List[Dict[str, Any]]] = []
+    metadata: Optional[Dict[str, Any]] = {}
+
+    class Config:
+        from_attributes = True
+
+
+class BriefSection(BaseModel):
+    """Schema for a brief section"""
+    items: List[Dict[str, str]]
+
+
+class InformationBriefSchema(BaseModel):
+    """Information brief schema for API"""
+    id: Optional[int] = None
+    generated_at: datetime
+    period_start: datetime
+    period_end: datetime
+
+    breaking_updates: Optional[List[Dict[str, str]]] = []
+    city_life: Optional[List[Dict[str, str]]] = []
+    culture_events: Optional[List[Dict[str, str]]] = []
+    business_economy: Optional[List[Dict[str, str]]] = []
+    food_nightlife: Optional[List[Dict[str, str]]] = []
+    social_trends: Optional[Dict[str, Any]] = {}
+    travel_movement: Optional[Dict[str, Any]] = {}
+    new_places: Optional[List[Dict[str, str]]] = []
+    community_stories: Optional[List[Dict[str, str]]] = []
+
+    total_items: int
+    sources_count: int
+
+    markdown_content: Optional[str] = None
+
+    class Config:
+        from_attributes = True
+
+
+class SearchQuery(BaseModel):
+    """Search query parameters"""
+    q: str = Field(..., min_length=1)
+    category: Optional[CategoryType] = None
+    from_date: Optional[datetime] = None
+    to_date: Optional[datetime] = None
+    min_reliability: Optional[float] = Field(None, ge=0, le=1)
+    limit: int = Field(50, ge=1, le=500)
+    offset: int = Field(0, ge=0)
+
+
+class CollectionStats(BaseModel):
+    """Statistics about data collection"""
+    total_items: int
+    items_by_category: Dict[str, int]
+    items_by_source: Dict[str, int]
+    latest_collection: Optional[datetime]
+    active_alerts: int
+    trending_topics_count: int
--- a/nairobi-info-collector/app/processors/init.py
+++ b/nairobi-info-collector/app/processors/init.py
@ -0,0 +1,6 @@
+"""
+Data processors and analysis modules
+"""
+from .data_processor import DataProcessor
+
+__all__ = ["DataProcessor"]
--- a/nairobi-info-collector/app/processors/data_processor.py
+++ b/nairobi-info-collector/app/processors/data_processor.py
@ -0,0 +1,365 @@
+"""
+Data processing and brief generation
+"""
+import logging
+from typing import List, Dict, Any, Optional
+from datetime import datetime, timedelta
+from sqlalchemy.orm import Session
+from sqlalchemy import func
+
+from app.models.data_models import (
+    InformationItem, InformationBrief, TrendingTopic,
+    Alert, CategoryType
+)
+from app.config import CATEGORIES
+
+logger = logging.getLogger(__name__)
+
+
+class DataProcessor:
+    """
+    Processes collected data and generates intelligence briefs
+    """
+
+    def __init__(self, db: Session):
+        """
+        Initialize data processor
+
+        Args:
+            db: Database session
+        """
+        self.db = db
+
+    def generate_brief(self, hours: int = 24) -> InformationBrief:
+        """
+        Generate an intelligence brief for a time period
+
+        Args:
+            hours: Number of hours to include in the brief
+
+        Returns:
+            Generated InformationBrief
+        """
+        logger.info(f"Generating intelligence brief for last {hours} hours")
+
+        period_end = datetime.utcnow()
+        period_start = period_end - timedelta(hours=hours)
+
+        # Get items from the period
+        items = self.db.query(InformationItem).filter(
+            InformationItem.collected_at >= period_start,
+            InformationItem.collected_at <= period_end
+        ).all()
+
+        # Organize by category
+        breaking_updates = self._get_items_by_category(items, CategoryType.BREAKING)
+        city_life = self._get_items_by_category(items, CategoryType.NEWS)
+        culture_events = self._get_items_by_category(items, CategoryType.EVENTS)
+        business_economy = self._get_items_by_category(items, CategoryType.ECONOMY)
+        food_nightlife = self._get_items_by_category(items, CategoryType.FOOD)
+        new_places = self._get_items_by_category(items, CategoryType.PLACES)
+        community_stories = self._get_items_by_category(items, CategoryType.COMMUNITY)
+
+        # Get social media trends
+        social_trends = self._get_social_trends(period_start)
+
+        # Get travel/movement info
+        travel_movement = self._get_travel_info(items, period_start)
+
+        # Count unique sources
+        sources = set(item.source_name for item in items if item.source_name)
+        sources_count = len(sources)
+
+        # Generate markdown content
+        markdown = self._generate_markdown(
+            period_start,
+            period_end,
+            breaking_updates,
+            city_life,
+            culture_events,
+            business_economy,
+            food_nightlife,
+            social_trends,
+            travel_movement,
+            new_places,
+            community_stories
+        )
+
+        # Create brief
+        brief = InformationBrief(
+            generated_at=datetime.utcnow(),
+            period_start=period_start,
+            period_end=period_end,
+            breaking_updates=breaking_updates,
+            city_life=city_life,
+            culture_events=culture_events,
+            business_economy=business_economy,
+            food_nightlife=food_nightlife,
+            social_trends=social_trends,
+            travel_movement=travel_movement,
+            new_places=new_places,
+            community_stories=community_stories,
+            total_items=len(items),
+            sources_count=sources_count,
+            markdown_content=markdown
+        )
+
+        self.db.add(brief)
+        self.db.commit()
+        self.db.refresh(brief)
+
+        logger.info(f"Generated brief with {len(items)} items from {sources_count} sources")
+
+        return brief
+
+    def _get_items_by_category(
+        self,
+        items: List[InformationItem],
+        category: CategoryType,
+        limit: int = 10
+    ) -> List[Dict[str, str]]:
+        """
+        Get items for a specific category
+
+        Args:
+            items: List of all items
+            category: Category to filter by
+            limit: Maximum number of items
+
+        Returns:
+            List of item dictionaries
+        """
+        category_items = [
+            item for item in items
+            if item.category == category
+        ]
+
+        # Sort by importance/recency
+        category_items.sort(
+            key=lambda x: (
+                x.importance_score or 0,
+                x.collected_at
+            ),
+            reverse=True
+        )
+
+        return [
+            {
+                'title': item.title,
+                'summary': item.summary or '',
+                'source': item.source_name or '',
+                'url': item.url or '',
+                'date': item.published_at.isoformat() if item.published_at else item.collected_at.isoformat()
+            }
+            for item in category_items[:limit]
+        ]
+
+    def _get_social_trends(self, since: datetime) -> Dict[str, Any]:
+        """
+        Get social media trends
+
+        Args:
+            since: Start date
+
+        Returns:
+            Dictionary with social trends
+        """
+        # Get trending topics
+        topics = self.db.query(TrendingTopic).filter(
+            TrendingTopic.last_updated >= since
+        ).order_by(
+            TrendingTopic.mention_count.desc()
+        ).limit(10).all()
+
+        # Get top social posts
+        social_items = self.db.query(InformationItem).filter(
+            InformationItem.category == CategoryType.SOCIAL,
+            InformationItem.collected_at >= since
+        ).order_by(
+            InformationItem.importance_score.desc()
+        ).limit(5).all()
+
+        trending_hashtags = [
+            {
+                'topic': t.topic,
+                'platform': t.platform,
+                'mentions': t.mention_count
+            }
+            for t in topics
+        ]
+
+        viral_content = [
+            {
+                'title': item.title,
+                'summary': item.summary or '',
+                'url': item.url or ''
+            }
+            for item in social_items
+        ]
+
+        return {
+            'trending_hashtags': trending_hashtags,
+            'viral_content': viral_content
+        }
+
+    def _get_travel_info(
+        self,
+        items: List[InformationItem],
+        since: datetime
+    ) -> Dict[str, Any]:
+        """
+        Get travel and movement information
+
+        Args:
+            items: All items
+            since: Start date
+
+        Returns:
+            Dictionary with travel info
+        """
+        travel_items = [
+            item for item in items
+            if item.category == CategoryType.TRAVEL
+        ]
+
+        # Get active alerts related to travel
+        alerts = self.db.query(Alert).filter(
+            Alert.is_active == True,
+            Alert.alert_type.in_(['traffic', 'transport', 'road']),
+            Alert.created_at >= since
+        ).all()
+
+        traffic_alerts = [
+            {
+                'title': alert.title,
+                'message': alert.message,
+                'severity': alert.severity,
+                'area': alert.area_affected or ''
+            }
+            for alert in alerts
+        ]
+
+        transit_info = [
+            {
+                'title': item.title,
+                'summary': item.summary or '',
+                'source': item.source_name or ''
+            }
+            for item in travel_items[:5]
+        ]
+
+        return {
+            'traffic_alerts': traffic_alerts,
+            'transit_information': transit_info
+        }
+
+    def _generate_markdown(
+        self,
+        start: datetime,
+        end: datetime,
+        breaking: List[Dict],
+        city_life: List[Dict],
+        culture: List[Dict],
+        economy: List[Dict],
+        food: List[Dict],
+        social: Dict,
+        travel: Dict,
+        places: List[Dict],
+        community: List[Dict]
+    ) -> str:
+        """
+        Generate markdown formatted brief
+
+        Returns:
+            Markdown string
+        """
+        md = f"# Nairobi Intelligence Brief\n\n"
+        md += f"**Generated:** {datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S UTC')}\n\n"
+        md += f"**Period:** {start.strftime('%Y-%m-%d %H:%M')} to {end.strftime('%Y-%m-%d %H:%M')}\n\n"
+        md += "---\n\n"
+
+        # Breaking Updates
+        if breaking:
+            md += "## 🚨 Breaking Updates\n\n"
+            for item in breaking:
+                md += f"- **{item['title']}** — {item['summary']} — [{item['source']}]({item['url']})\n"
+            md += "\n"
+
+        # City Life & Alerts
+        if city_life:
+            md += "## 🏙️ City Life & Alerts\n\n"
+            for item in city_life:
+                md += f"- **{item['title']}** — {item['summary']} — [{item['source']}]({item['url']})\n"
+            md += "\n"
+
+        # Culture & Events
+        if culture:
+            md += "## 🎭 Culture & Events\n\n"
+            for item in culture:
+                md += f"- **{item['title']}** — {item['summary']} — [{item['source']}]({item['url']})\n"
+            md += "\n"
+
+        # Business & Economy
+        if economy:
+            md += "## 💼 Business & Economy\n\n"
+            for item in economy:
+                md += f"- **{item['title']}** — {item['summary']} — [{item['source']}]({item['url']})\n"
+            md += "\n"
+
+        # Food & Nightlife
+        if food:
+            md += "## 🍽️ Food & Nightlife\n\n"
+            for item in food:
+                md += f"- **{item['title']}** — {item['summary']} — [{item['source']}]({item['url']})\n"
+            md += "\n"
+
+        # Social Media Trends
+        if social.get('trending_hashtags') or social.get('viral_content'):
+            md += "## 📱 Social Media Trends\n\n"
+
+            if social.get('trending_hashtags'):
+                md += "### Trending Hashtags:\n"
+                for tag in social['trending_hashtags']:
+                    md += f"- **{tag['topic']}** ({tag['platform']}) — {tag['mentions']} mentions\n"
+                md += "\n"
+
+            if social.get('viral_content'):
+                md += "### Viral Content:\n"
+                for content in social['viral_content']:
+                    md += f"- [{content['title']}]({content['url']}) — {content['summary']}\n"
+                md += "\n"
+
+        # Travel & Movement
+        if travel.get('traffic_alerts') or travel.get('transit_information'):
+            md += "## 🚗 Travel & Movement\n\n"
+
+            if travel.get('traffic_alerts'):
+                md += "### Traffic Alerts:\n"
+                for alert in travel['traffic_alerts']:
+                    md += f"- **{alert['title']}** ({alert['severity']}) — {alert['message']}\n"
+                md += "\n"
+
+            if travel.get('transit_information'):
+                md += "### Transit Information:\n"
+                for info in travel['transit_information']:
+                    md += f"- {info['title']} — {info['summary']}\n"
+                md += "\n"
+
+        # New Places / Reviews
+        if places:
+            md += "## 📍 New Places / Reviews\n\n"
+            for item in places:
+                md += f"- **{item['title']}** — {item['summary']} — [{item['source']}]({item['url']})\n"
+            md += "\n"
+
+        # Community Stories
+        if community:
+            md += "## 👥 Community Stories\n\n"
+            for item in community:
+                md += f"- **{item['title']}** — {item['summary']} — [{item['source']}]({item['url']})\n"
+            md += "\n"
+
+        md += "---\n\n"
+        md += "*End of brief.*\n"
+
+        return md
--- a/nairobi-info-collector/app/scheduler/init.py
+++ b/nairobi-info-collector/app/scheduler/init.py
@ -0,0 +1,6 @@
+"""
+Task scheduler for automated data collection
+"""
+from .tasks import start_scheduler, run_all_collectors
+
+__all__ = ["start_scheduler", "run_all_collectors"]
--- a/nairobi-info-collector/app/scheduler/tasks.py
+++ b/nairobi-info-collector/app/scheduler/tasks.py
@ -0,0 +1,150 @@
+"""
+Scheduled tasks for data collection
+"""
+import logging
+from apscheduler.schedulers.background import BackgroundScheduler
+from apscheduler.triggers.interval import IntervalTrigger
+from datetime import datetime
+
+from app.database import SessionLocal
+from app.collectors import (
+    NewsCollector,
+    SocialMediaCollector,
+    GovernmentCollector,
+    TourismCollector,
+    BusinessCollector
+)
+from app.processors import DataProcessor
+from app.config import get_settings
+
+logger = logging.getLogger(__name__)
+settings = get_settings()
+
+scheduler = BackgroundScheduler()
+
+
+def run_all_collectors():
+    """
+    Run all data collectors
+
+    This function is executed on a schedule
+    """
+    logger.info("Starting scheduled data collection")
+    start_time = datetime.utcnow()
+
+    db = SessionLocal()
+    results = []
+
+    try:
+        # Run collectors based on feature flags
+        if settings.enable_news_collection:
+            logger.info("Running news collector...")
+            news_collector = NewsCollector(db, "all")
+            result = news_collector.run()
+            results.append(result)
+
+        if settings.enable_social_media_collection:
+            logger.info("Running social media collector...")
+            social_collector = SocialMediaCollector(db, "all")
+            result = social_collector.run()
+            results.append(result)
+
+        if settings.enable_government_collection:
+            logger.info("Running government collector...")
+            gov_collector = GovernmentCollector(db)
+            result = gov_collector.run()
+            results.append(result)
+
+        if settings.enable_tourism_collection:
+            logger.info("Running tourism collector...")
+            tourism_collector = TourismCollector(db)
+            result = tourism_collector.run()
+            results.append(result)
+
+        if settings.enable_business_collection:
+            logger.info("Running business collector...")
+            business_collector = BusinessCollector(db)
+            result = business_collector.run()
+            results.append(result)
+
+        # Calculate totals
+        total_items = sum(r.get('items_collected', 0) for r in results)
+        successful = sum(1 for r in results if r.get('success', False))
+        failed = len(results) - successful
+
+        elapsed = (datetime.utcnow() - start_time).total_seconds()
+
+        logger.info(
+            f"Collection completed: {total_items} items from {successful} sources "
+            f"in {elapsed:.2f}s ({failed} failed)"
+        )
+
+    except Exception as e:
+        logger.error(f"Error in scheduled collection: {e}")
+
+    finally:
+        db.close()
+
+
+def generate_brief():
+    """
+    Generate a new intelligence brief
+
+    This function is executed on a schedule
+    """
+    logger.info("Generating intelligence brief")
+
+    db = SessionLocal()
+
+    try:
+        processor = DataProcessor(db)
+        brief = processor.generate_brief(hours=24)
+
+        logger.info(
+            f"Brief generated with {brief.total_items} items "
+            f"from {brief.sources_count} sources"
+        )
+
+    except Exception as e:
+        logger.error(f"Error generating brief: {e}")
+
+    finally:
+        db.close()
+
+
+def start_scheduler():
+    """
+    Start the background scheduler with all tasks
+    """
+    logger.info("Starting task scheduler")
+
+    # Schedule data collection
+    scheduler.add_job(
+        func=run_all_collectors,
+        trigger=IntervalTrigger(seconds=settings.collection_interval_seconds),
+        id='collect_data',
+        name='Collect data from all sources',
+        replace_existing=True
+    )
+
+    # Schedule brief generation (every 6 hours)
+    scheduler.add_job(
+        func=generate_brief,
+        trigger=IntervalTrigger(hours=6),
+        id='generate_brief',
+        name='Generate intelligence brief',
+        replace_existing=True
+    )
+
+    # Start the scheduler
+    scheduler.start()
+
+    logger.info(
+        f"Scheduler started. Collection interval: {settings.collection_interval_seconds}s"
+    )
+
+
+def stop_scheduler():
+    """Stop the background scheduler"""
+    logger.info("Stopping task scheduler")
+    scheduler.shutdown()
--- a/nairobi-info-collector/cli.py
+++ b/nairobi-info-collector/cli.py
@ -0,0 +1,187 @@
+#!/usr/bin/env python3
+"""
+Command-line interface for Nairobi Information Collector
+"""
+import argparse
+import logging
+from datetime import datetime
+
+from app.database import SessionLocal, init_db
+from app.collectors import (
+    NewsCollector,
+    SocialMediaCollector,
+    GovernmentCollector,
+    TourismCollector,
+    BusinessCollector
+)
+from app.processors import DataProcessor
+from app.scheduler.tasks import run_all_collectors
+
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+
+logger = logging.getLogger(__name__)
+
+
+def collect_news(args):
+    """Collect news from all sources"""
+    logger.info("Collecting news...")
+    db = SessionLocal()
+    try:
+        collector = NewsCollector(db, args.source or "all")
+        result = collector.run()
+        print(f"✓ Collected {result['items_collected']} items in {result['elapsed_seconds']}s")
+    finally:
+        db.close()
+
+
+def collect_social(args):
+    """Collect social media data"""
+    logger.info("Collecting social media data...")
+    db = SessionLocal()
+    try:
+        collector = SocialMediaCollector(db, args.platform or "all")
+        result = collector.run()
+        print(f"✓ Collected {result['items_collected']} items in {result['elapsed_seconds']}s")
+    finally:
+        db.close()
+
+
+def collect_government(args):
+    """Collect government data"""
+    logger.info("Collecting government data...")
+    db = SessionLocal()
+    try:
+        collector = GovernmentCollector(db)
+        result = collector.run()
+        print(f"✓ Collected {result['items_collected']} items in {result['elapsed_seconds']}s")
+    finally:
+        db.close()
+
+
+def collect_tourism(args):
+    """Collect tourism data"""
+    logger.info("Collecting tourism data...")
+    db = SessionLocal()
+    try:
+        collector = TourismCollector(db)
+        result = collector.run()
+        print(f"✓ Collected {result['items_collected']} items in {result['elapsed_seconds']}s")
+    finally:
+        db.close()
+
+
+def collect_business(args):
+    """Collect business data"""
+    logger.info("Collecting business data...")
+    db = SessionLocal()
+    try:
+        collector = BusinessCollector(db)
+        result = collector.run()
+        print(f"✓ Collected {result['items_collected']} items in {result['elapsed_seconds']}s")
+    finally:
+        db.close()
+
+
+def collect_all(args):
+    """Collect from all sources"""
+    logger.info("Collecting from all sources...")
+    run_all_collectors()
+    print("✓ Collection completed")
+
+
+def generate_brief(args):
+    """Generate an intelligence brief"""
+    logger.info(f"Generating brief for last {args.hours} hours...")
+    db = SessionLocal()
+    try:
+        processor = DataProcessor(db)
+        brief = processor.generate_brief(hours=args.hours)
+
+        print(f"\n✓ Brief generated:")
+        print(f"  - Period: {brief.period_start} to {brief.period_end}")
+        print(f"  - Total items: {brief.total_items}")
+        print(f"  - Sources: {brief.sources_count}")
+
+        if args.output:
+            with open(args.output, 'w') as f:
+                f.write(brief.markdown_content)
+            print(f"  - Saved to: {args.output}")
+        else:
+            print("\n" + brief.markdown_content)
+
+    finally:
+        db.close()
+
+
+def setup_database(args):
+    """Initialize the database"""
+    logger.info("Initializing database...")
+    try:
+        init_db()
+        print("✓ Database initialized successfully")
+    except Exception as e:
+        print(f"✗ Database initialization failed: {e}")
+
+
+def main():
+    """Main CLI entry point"""
+    parser = argparse.ArgumentParser(
+        description='Nairobi Information Collector CLI'
+    )
+
+    subparsers = parser.add_subparsers(dest='command', help='Command to run')
+
+    # Collect commands
+    collect_parser = subparsers.add_parser('collect', help='Collect data from sources')
+    collect_subparsers = collect_parser.add_subparsers(dest='source_type')
+
+    # News
+    news_parser = collect_subparsers.add_parser('news', help='Collect news')
+    news_parser.add_argument('--source', help='Specific news source')
+    news_parser.set_defaults(func=collect_news)
+
+    # Social media
+    social_parser = collect_subparsers.add_parser('social', help='Collect social media')
+    social_parser.add_argument('--platform', help='Specific platform (twitter, instagram, etc.)')
+    social_parser.set_defaults(func=collect_social)
+
+    # Government
+    gov_parser = collect_subparsers.add_parser('government', help='Collect government data')
+    gov_parser.set_defaults(func=collect_government)
+
+    # Tourism
+    tourism_parser = collect_subparsers.add_parser('tourism', help='Collect tourism data')
+    tourism_parser.set_defaults(func=collect_tourism)
+
+    # Business
+    business_parser = collect_subparsers.add_parser('business', help='Collect business data')
+    business_parser.set_defaults(func=collect_business)
+
+    # All
+    all_parser = collect_subparsers.add_parser('all', help='Collect from all sources')
+    all_parser.set_defaults(func=collect_all)
+
+    # Brief generation
+    brief_parser = subparsers.add_parser('brief', help='Generate intelligence brief')
+    brief_parser.add_argument('--hours', type=int, default=24, help='Hours to include in brief')
+    brief_parser.add_argument('--output', help='Output file for markdown')
+    brief_parser.set_defaults(func=generate_brief)
+
+    # Database setup
+    db_parser = subparsers.add_parser('init-db', help='Initialize database')
+    db_parser.set_defaults(func=setup_database)
+
+    args = parser.parse_args()
+
+    if hasattr(args, 'func'):
+        args.func(args)
+    else:
+        parser.print_help()
+
+
+if __name__ == '__main__':
+    main()
--- a/nairobi-info-collector/docker-compose.yml
+++ b/nairobi-info-collector/docker-compose.yml
@ -0,0 +1,72 @@
+version: '3.8'
+
+services:
+  # PostgreSQL Database
+  db:
+    image: postgres:15-alpine
+    container_name: nairobi_db
+    environment:
+      POSTGRES_USER: nairobiuser
+      POSTGRES_PASSWORD: nairobipass
+      POSTGRES_DB: nairobi_info
+    volumes:
+      - postgres_data:/var/lib/postgresql/data
+    ports:
+      - "5432:5432"
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U nairobiuser"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+
+  # Redis Cache
+  redis:
+    image: redis:7-alpine
+    container_name: nairobi_redis
+    ports:
+      - "6379:6379"
+    volumes:
+      - redis_data:/data
+    healthcheck:
+      test: ["CMD", "redis-cli", "ping"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+
+  # Main Application
+  app:
+    build: .
+    container_name: nairobi_app
+    ports:
+      - "8000:8000"
+    environment:
+      - DATABASE_URL=postgresql://nairobiuser:nairobipass@db:5432/nairobi_info
+      - REDIS_URL=redis://redis:6379/0
+      - ENVIRONMENT=production
+      - DEBUG=False
+    depends_on:
+      db:
+        condition: service_healthy
+      redis:
+        condition: service_healthy
+    volumes:
+      - ./logs:/app/logs
+      - ./.env:/app/.env
+    restart: unless-stopped
+
+  # Nginx Reverse Proxy (optional)
+  nginx:
+    image: nginx:alpine
+    container_name: nairobi_nginx
+    ports:
+      - "80:80"
+      - "443:443"
+    volumes:
+      - ./nginx.conf:/etc/nginx/nginx.conf:ro
+    depends_on:
+      - app
+    restart: unless-stopped
+
+volumes:
+  postgres_data:
+  redis_data:
--- a/nairobi-info-collector/example_usage.py
+++ b/nairobi-info-collector/example_usage.py
@ -0,0 +1,237 @@
+#!/usr/bin/env python3
+"""
+Example usage of Nairobi Information Collector
+
+This script demonstrates how to use the collector programmatically
+"""
+
+from app.database import SessionLocal, init_db
+from app.collectors import NewsCollector
+from app.processors import DataProcessor
+from app.models.data_models import InformationItem, CategoryType
+from datetime import datetime, timedelta
+
+
+def example_1_collect_news():
+    """Example 1: Collect news from all sources"""
+    print("=" * 60)
+    print("Example 1: Collecting News")
+    print("=" * 60)
+
+    db = SessionLocal()
+
+    try:
+        # Create news collector
+        collector = NewsCollector(db, "all")
+
+        # Run collection
+        result = collector.run()
+
+        print(f"\nCollection Results:")
+        print(f"  - Items collected: {result['items_collected']}")
+        print(f"  - Time taken: {result['elapsed_seconds']}s")
+        print(f"  - Success: {result['success']}")
+
+    finally:
+        db.close()
+
+
+def example_2_query_data():
+    """Example 2: Query collected data"""
+    print("\n" + "=" * 60)
+    print("Example 2: Querying Data")
+    print("=" * 60)
+
+    db = SessionLocal()
+
+    try:
+        # Get total items
+        total = db.query(InformationItem).count()
+        print(f"\nTotal items in database: {total}")
+
+        # Get items by category
+        print("\nItems by category:")
+        for category in CategoryType:
+            count = db.query(InformationItem).filter(
+                InformationItem.category == category
+            ).count()
+            print(f"  - {category.value}: {count}")
+
+        # Get latest items
+        print("\nLatest 5 items:")
+        latest = db.query(InformationItem).order_by(
+            InformationItem.collected_at.desc()
+        ).limit(5).all()
+
+        for item in latest:
+            print(f"  - [{item.category.value}] {item.title[:60]}...")
+
+    finally:
+        db.close()
+
+
+def example_3_generate_brief():
+    """Example 3: Generate an intelligence brief"""
+    print("\n" + "=" * 60)
+    print("Example 3: Generating Intelligence Brief")
+    print("=" * 60)
+
+    db = SessionLocal()
+
+    try:
+        # Create processor
+        processor = DataProcessor(db)
+
+        # Generate brief for last 24 hours
+        brief = processor.generate_brief(hours=24)
+
+        print(f"\nBrief generated:")
+        print(f"  - Period: {brief.period_start} to {brief.period_end}")
+        print(f"  - Total items: {brief.total_items}")
+        print(f"  - Sources: {brief.sources_count}")
+
+        # Save to file
+        output_file = f"brief_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md"
+        with open(output_file, 'w') as f:
+            f.write(brief.markdown_content)
+
+        print(f"  - Saved to: {output_file}")
+
+        # Print preview
+        print("\nBrief preview:")
+        print("-" * 60)
+        lines = brief.markdown_content.split('\n')
+        print('\n'.join(lines[:20]))
+        print("...")
+        print("-" * 60)
+
+    finally:
+        db.close()
+
+
+def example_4_search():
+    """Example 4: Search for specific information"""
+    print("\n" + "=" * 60)
+    print("Example 4: Searching Information")
+    print("=" * 60)
+
+    db = SessionLocal()
+
+    try:
+        # Search for items containing "restaurant"
+        query = "restaurant"
+
+        results = db.query(InformationItem).filter(
+            (InformationItem.title.ilike(f"%{query}%")) |
+            (InformationItem.summary.ilike(f"%{query}%"))
+        ).limit(5).all()
+
+        print(f"\nSearch results for '{query}':")
+        print(f"Found {len(results)} items\n")
+
+        for i, item in enumerate(results, 1):
+            print(f"{i}. {item.title}")
+            print(f"   Category: {item.category.value}")
+            print(f"   Source: {item.source_name}")
+            print(f"   URL: {item.url}")
+            print()
+
+    finally:
+        db.close()
+
+
+def example_5_api_usage():
+    """Example 5: Using the REST API"""
+    print("\n" + "=" * 60)
+    print("Example 5: Using REST API")
+    print("=" * 60)
+
+    import requests
+
+    base_url = "http://localhost:8000/api/v1"
+
+    print("\nMake sure the API server is running!")
+    print("Run: python -m app.main\n")
+
+    try:
+        # Get stats
+        print("Getting statistics...")
+        response = requests.get(f"{base_url}/stats", timeout=5)
+        if response.status_code == 200:
+            stats = response.json()
+            print(f"  - Total items: {stats['total_items']}")
+            print(f"  - Active alerts: {stats['active_alerts']}")
+        else:
+            print("  ✗ API not available")
+
+        # Search
+        print("\nSearching via API...")
+        response = requests.get(
+            f"{base_url}/search",
+            params={"q": "nairobi", "limit": 3},
+            timeout=5
+        )
+        if response.status_code == 200:
+            results = response.json()
+            print(f"  - Found {len(results)} results")
+
+    except requests.exceptions.ConnectionError:
+        print("  ✗ Could not connect to API server")
+        print("    Start the server with: python -m app.main")
+    except Exception as e:
+        print(f"  ✗ Error: {e}")
+
+
+def main():
+    """Run all examples"""
+    print("\n")
+    print("╔" + "=" * 58 + "╗")
+    print("║" + " " * 10 + "Nairobi Information Collector" + " " * 19 + "║")
+    print("║" + " " * 19 + "Example Usage" + " " * 26 + "║")
+    print("╚" + "=" * 58 + "╝")
+    print()
+
+    # Initialize database if needed
+    print("Initializing database...")
+    try:
+        init_db()
+        print("✓ Database ready\n")
+    except:
+        pass
+
+    # Run examples
+    try:
+        # Only run data query example if we have data
+        db = SessionLocal()
+        item_count = db.query(InformationItem).count()
+        db.close()
+
+        if item_count > 0:
+            example_2_query_data()
+            example_3_generate_brief()
+            example_4_search()
+        else:
+            print("\nNo data in database. Running collection first...\n")
+            example_1_collect_news()
+            example_2_query_data()
+
+        # API example (may fail if server not running)
+        example_5_api_usage()
+
+    except KeyboardInterrupt:
+        print("\n\nExamples interrupted by user")
+    except Exception as e:
+        print(f"\n\nError running examples: {e}")
+
+    print("\n" + "=" * 60)
+    print("Examples completed!")
+    print("=" * 60)
+    print("\nFor more information, see:")
+    print("  - README.md")
+    print("  - QUICKSTART.md")
+    print("  - API docs: http://localhost:8000/docs")
+    print()
+
+
+if __name__ == "__main__":
+    main()
--- a/nairobi-info-collector/requirements.txt
+++ b/nairobi-info-collector/requirements.txt
@ -0,0 +1,79 @@
+# Web Framework
+fastapi==0.109.0
+uvicorn[standard]==0.27.0
+pydantic==2.5.3
+pydantic-settings==2.1.0
+
+# Database
+sqlalchemy==2.0.25
+alembic==1.13.1
+psycopg2-binary==2.9.9
+asyncpg==0.29.0
+
+# Web Scraping
+beautifulsoup4==4.12.3
+requests==2.31.0
+httpx==0.26.0
+scrapy==2.11.0
+selenium==4.16.0
+lxml==5.1.0
+
+# Social Media APIs
+tweepy==4.14.0
+instagrapi==2.0.0
+tiktok-api==6.3.1
+
+# Data Processing
+pandas==2.1.4
+numpy==1.26.3
+
+# NLP & Text Processing
+openai==1.7.2
+transformers==4.36.2
+spacy==3.7.2
+nltk==3.8.1
+
+# Scheduling
+apscheduler==3.10.4
+celery==5.3.4
+redis==5.0.1
+
+# Caching
+aiocache==0.12.2
+diskcache==5.6.3
+
+# Configuration
+python-dotenv==1.0.0
+
+# HTTP & API
+aiohttp==3.9.1
+tenacity==8.2.3
+
+# Date & Time
+python-dateutil==2.8.2
+pytz==2023.3.post1
+
+# Utilities
+loguru==0.7.2
+python-multipart==0.0.6
+email-validator==2.1.0
+
+# Testing
+pytest==7.4.4
+pytest-asyncio==0.23.3
+pytest-cov==4.1.0
+httpx==0.26.0
+
+# Development
+black==23.12.1
+flake8==7.0.0
+mypy==1.8.0
+pre-commit==3.6.0
+
+# Monitoring
+prometheus-client==0.19.0
+sentry-sdk==1.39.2
+
+# Security
+cryptography==41.0.7
+python-jose[cryptography]==3.3.0
--- a/nairobi-info-collector/setup.sh
+++ b/nairobi-info-collector/setup.sh
@ -0,0 +1,109 @@
+#!/bin/bash
+
+# Setup script for Nairobi Information Collector
+# This script automates the initial setup process
+
+set -e  # Exit on error
+
+echo "=================================="
+echo "Nairobi Information Collector"
+echo "Setup Script"
+echo "=================================="
+echo ""
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+NC='\033[0m' # No Color
+
+# Check Python version
+echo -n "Checking Python version... "
+if command -v python3 &> /dev/null; then
+    PYTHON_VERSION=$(python3 --version | cut -d' ' -f2 | cut -d'.' -f1,2)
+    REQUIRED_VERSION="3.9"
+
+    if [ "$(printf '%s\n' "$REQUIRED_VERSION" "$PYTHON_VERSION" | sort -V | head -n1)" = "$REQUIRED_VERSION" ]; then
+        echo -e "${GREEN}✓ Python $PYTHON_VERSION${NC}"
+    else
+        echo -e "${RED}✗ Python 3.9+ required (found $PYTHON_VERSION)${NC}"
+        exit 1
+    fi
+else
+    echo -e "${RED}✗ Python 3 not found${NC}"
+    exit 1
+fi
+
+# Create logs directory
+echo -n "Creating logs directory... "
+mkdir -p logs
+echo -e "${GREEN}✓${NC}"
+
+# Create virtual environment
+if [ ! -d "venv" ]; then
+    echo -n "Creating virtual environment... "
+    python3 -m venv venv
+    echo -e "${GREEN}✓${NC}"
+else
+    echo -e "${YELLOW}Virtual environment already exists${NC}"
+fi
+
+# Activate virtual environment
+echo "Activating virtual environment..."
+source venv/bin/activate
+
+# Upgrade pip
+echo -n "Upgrading pip... "
+pip install --upgrade pip > /dev/null 2>&1
+echo -e "${GREEN}✓${NC}"
+
+# Install dependencies
+echo "Installing dependencies..."
+pip install -r requirements.txt
+
+# Download spaCy model
+echo -n "Downloading NLP model... "
+python -m spacy download en_core_web_sm > /dev/null 2>&1
+echo -e "${GREEN}✓${NC}"
+
+# Create .env file if it doesn't exist
+if [ ! -f ".env" ]; then
+    echo -n "Creating .env file... "
+    cp .env.example .env
+    echo -e "${GREEN}✓${NC}"
+    echo -e "${YELLOW}⚠ Please edit .env file with your API keys${NC}"
+else
+    echo -e "${YELLOW}.env file already exists${NC}"
+fi
+
+# Initialize database
+echo -n "Initializing database... "
+python cli.py init-db > /dev/null 2>&1
+echo -e "${GREEN}✓${NC}"
+
+# Make CLI executable
+chmod +x cli.py
+
+echo ""
+echo "=================================="
+echo -e "${GREEN}Setup completed successfully!${NC}"
+echo "=================================="
+echo ""
+echo "Next steps:"
+echo "1. Edit .env file with your API keys:"
+echo "   nano .env"
+echo ""
+echo "2. Activate virtual environment:"
+echo "   source venv/bin/activate"
+echo ""
+echo "3. Start the application:"
+echo "   python -m app.main"
+echo ""
+echo "4. Or run a manual collection:"
+echo "   python cli.py collect all"
+echo ""
+echo "5. Access the API:"
+echo "   http://localhost:8000/docs"
+echo ""
+echo "For more information, see QUICKSTART.md"
+echo ""