mirror of
https://git.datalinker.icu/deepseek-ai/DeepSeek-V3.git
synced 2026-05-26 23:59:09 +08:00
Comprehensive intelligence retrieval system for collecting and aggregating information about Nairobi, Kenya from multiple sources. Features: - Multi-source data collection (news, social media, government, tourism, business) - RESTful API with FastAPI - Automated scheduling for continuous data collection - Intelligence brief generation - Real-time trending topics tracking - Alert system for important updates - Web scraping with rate limiting and caching - Social media integration (Twitter, Instagram) - NLP-powered categorization and processing - Docker support for easy deployment - CLI for manual operations Components: - Data models with SQLAlchemy - Base collector class with extensible architecture - Source-specific collectors (news, social, government, tourism, business) - Data processor for brief generation - Scheduler for automated collection - Comprehensive API endpoints - CLI interface for manual control Documentation: - Complete README with setup instructions - Quick start guide - Example usage scripts - Docker Compose configuration - Environment configuration templates
188 lines
5.6 KiB
Python
Executable File
188 lines
5.6 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Command-line interface for Nairobi Information Collector
|
|
"""
|
|
import argparse
|
|
import logging
|
|
from datetime import datetime
|
|
|
|
from app.database import SessionLocal, init_db
|
|
from app.collectors import (
|
|
NewsCollector,
|
|
SocialMediaCollector,
|
|
GovernmentCollector,
|
|
TourismCollector,
|
|
BusinessCollector
|
|
)
|
|
from app.processors import DataProcessor
|
|
from app.scheduler.tasks import run_all_collectors
|
|
|
|
# Configure logging
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
)
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def collect_news(args):
|
|
"""Collect news from all sources"""
|
|
logger.info("Collecting news...")
|
|
db = SessionLocal()
|
|
try:
|
|
collector = NewsCollector(db, args.source or "all")
|
|
result = collector.run()
|
|
print(f"✓ Collected {result['items_collected']} items in {result['elapsed_seconds']}s")
|
|
finally:
|
|
db.close()
|
|
|
|
|
|
def collect_social(args):
|
|
"""Collect social media data"""
|
|
logger.info("Collecting social media data...")
|
|
db = SessionLocal()
|
|
try:
|
|
collector = SocialMediaCollector(db, args.platform or "all")
|
|
result = collector.run()
|
|
print(f"✓ Collected {result['items_collected']} items in {result['elapsed_seconds']}s")
|
|
finally:
|
|
db.close()
|
|
|
|
|
|
def collect_government(args):
|
|
"""Collect government data"""
|
|
logger.info("Collecting government data...")
|
|
db = SessionLocal()
|
|
try:
|
|
collector = GovernmentCollector(db)
|
|
result = collector.run()
|
|
print(f"✓ Collected {result['items_collected']} items in {result['elapsed_seconds']}s")
|
|
finally:
|
|
db.close()
|
|
|
|
|
|
def collect_tourism(args):
|
|
"""Collect tourism data"""
|
|
logger.info("Collecting tourism data...")
|
|
db = SessionLocal()
|
|
try:
|
|
collector = TourismCollector(db)
|
|
result = collector.run()
|
|
print(f"✓ Collected {result['items_collected']} items in {result['elapsed_seconds']}s")
|
|
finally:
|
|
db.close()
|
|
|
|
|
|
def collect_business(args):
|
|
"""Collect business data"""
|
|
logger.info("Collecting business data...")
|
|
db = SessionLocal()
|
|
try:
|
|
collector = BusinessCollector(db)
|
|
result = collector.run()
|
|
print(f"✓ Collected {result['items_collected']} items in {result['elapsed_seconds']}s")
|
|
finally:
|
|
db.close()
|
|
|
|
|
|
def collect_all(args):
|
|
"""Collect from all sources"""
|
|
logger.info("Collecting from all sources...")
|
|
run_all_collectors()
|
|
print("✓ Collection completed")
|
|
|
|
|
|
def generate_brief(args):
|
|
"""Generate an intelligence brief"""
|
|
logger.info(f"Generating brief for last {args.hours} hours...")
|
|
db = SessionLocal()
|
|
try:
|
|
processor = DataProcessor(db)
|
|
brief = processor.generate_brief(hours=args.hours)
|
|
|
|
print(f"\n✓ Brief generated:")
|
|
print(f" - Period: {brief.period_start} to {brief.period_end}")
|
|
print(f" - Total items: {brief.total_items}")
|
|
print(f" - Sources: {brief.sources_count}")
|
|
|
|
if args.output:
|
|
with open(args.output, 'w') as f:
|
|
f.write(brief.markdown_content)
|
|
print(f" - Saved to: {args.output}")
|
|
else:
|
|
print("\n" + brief.markdown_content)
|
|
|
|
finally:
|
|
db.close()
|
|
|
|
|
|
def setup_database(args):
|
|
"""Initialize the database"""
|
|
logger.info("Initializing database...")
|
|
try:
|
|
init_db()
|
|
print("✓ Database initialized successfully")
|
|
except Exception as e:
|
|
print(f"✗ Database initialization failed: {e}")
|
|
|
|
|
|
def main():
|
|
"""Main CLI entry point"""
|
|
parser = argparse.ArgumentParser(
|
|
description='Nairobi Information Collector CLI'
|
|
)
|
|
|
|
subparsers = parser.add_subparsers(dest='command', help='Command to run')
|
|
|
|
# Collect commands
|
|
collect_parser = subparsers.add_parser('collect', help='Collect data from sources')
|
|
collect_subparsers = collect_parser.add_subparsers(dest='source_type')
|
|
|
|
# News
|
|
news_parser = collect_subparsers.add_parser('news', help='Collect news')
|
|
news_parser.add_argument('--source', help='Specific news source')
|
|
news_parser.set_defaults(func=collect_news)
|
|
|
|
# Social media
|
|
social_parser = collect_subparsers.add_parser('social', help='Collect social media')
|
|
social_parser.add_argument('--platform', help='Specific platform (twitter, instagram, etc.)')
|
|
social_parser.set_defaults(func=collect_social)
|
|
|
|
# Government
|
|
gov_parser = collect_subparsers.add_parser('government', help='Collect government data')
|
|
gov_parser.set_defaults(func=collect_government)
|
|
|
|
# Tourism
|
|
tourism_parser = collect_subparsers.add_parser('tourism', help='Collect tourism data')
|
|
tourism_parser.set_defaults(func=collect_tourism)
|
|
|
|
# Business
|
|
business_parser = collect_subparsers.add_parser('business', help='Collect business data')
|
|
business_parser.set_defaults(func=collect_business)
|
|
|
|
# All
|
|
all_parser = collect_subparsers.add_parser('all', help='Collect from all sources')
|
|
all_parser.set_defaults(func=collect_all)
|
|
|
|
# Brief generation
|
|
brief_parser = subparsers.add_parser('brief', help='Generate intelligence brief')
|
|
brief_parser.add_argument('--hours', type=int, default=24, help='Hours to include in brief')
|
|
brief_parser.add_argument('--output', help='Output file for markdown')
|
|
brief_parser.set_defaults(func=generate_brief)
|
|
|
|
# Database setup
|
|
db_parser = subparsers.add_parser('init-db', help='Initialize database')
|
|
db_parser.set_defaults(func=setup_database)
|
|
|
|
args = parser.parse_args()
|
|
|
|
if hasattr(args, 'func'):
|
|
args.func(args)
|
|
else:
|
|
parser.print_help()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|