GT AI OS Community v2.0.33 - Add NVIDIA NIM and Nemotron agents

- Updated python_coding_microproject.csv to use NVIDIA NIM Kimi K2 - Updated kali_linux_shell_simulator.csv to use NVIDIA NIM Kimi K2 - Made more general-purpose (flexible targets, expanded tools) - Added nemotron-mini-agent.csv for fast local inference via Ollama - Added nemotron-agent.csv for advanced reasoning via Ollama - Added wiki page: Projects for NVIDIA NIMs and Nemotron
2025-12-12 17:47:14 -05:00
commit 310491a557
750 changed files with 232701 additions and 0 deletions
--- a/scripts/migrations/021_add_nvidia_models.sql
+++ b/scripts/migrations/021_add_nvidia_models.sql
@@ -0,0 +1,432 @@
+-- Migration: 021_add_nvidia_models.sql
+-- Description: Add NVIDIA NIM models to model_configs table
+-- Date: 2025-12-08
+-- Issue: #266 - Add NVIDIA API endpoint support
+-- Reference: https://build.nvidia.com/models
+
+-- NVIDIA NIM Models (build.nvidia.com)
+-- Pricing: Estimated based on third-party providers and model size (Dec 2025)
+-- Models selected: SOTA reasoning, coding, and general-purpose LLMs
+
+INSERT INTO model_configs (
+    model_id,
+    name,
+    version,
+    provider,
+    model_type,
+    endpoint,
+    context_window,
+    max_tokens,
+    cost_per_million_input,
+    cost_per_million_output,
+    capabilities,
+    is_active,
+    description,
+    created_at,
+    updated_at,
+    request_count,
+    error_count,
+    success_rate,
+    avg_latency_ms,
+    health_status
+)
+VALUES
+    -- ==========================================
+    -- NVIDIA Llama Nemotron Family (Flagship)
+    -- ==========================================
+
+    -- Llama 3.3 Nemotron Super 49B v1 - Latest flagship reasoning model
+    (
+        'nvidia/llama-3.3-nemotron-super-49b-v1',
+        'NVIDIA Llama 3.3 Nemotron Super 49B',
+        '1.0',
+        'nvidia',
+        'llm',
+        'https://integrate.api.nvidia.com/v1/chat/completions',
+        131072,
+        8192,
+        0.5,
+        1.5,
+        '{"streaming": true, "function_calling": true, "reasoning": true}',
+        true,
+        'NVIDIA flagship reasoning model - best accuracy/throughput on single GPU',
+        NOW(),
+        NOW(),
+        0,
+        0,
+        100.0,
+        0,
+        'unknown'
+    ),
+    -- Llama 3.1 Nemotron Ultra 253B - Maximum accuracy
+    (
+        'nvidia/llama-3.1-nemotron-ultra-253b-v1',
+        'NVIDIA Llama 3.1 Nemotron Ultra 253B',
+        '1.0',
+        'nvidia',
+        'llm',
+        'https://integrate.api.nvidia.com/v1/chat/completions',
+        131072,
+        8192,
+        0.6,
+        1.8,
+        '{"streaming": true, "function_calling": true, "reasoning": true}',
+        true,
+        'Maximum agentic accuracy for scientific reasoning, math, and coding',
+        NOW(),
+        NOW(),
+        0,
+        0,
+        100.0,
+        0,
+        'unknown'
+    ),
+    -- Nemotron Nano 8B - Edge/PC deployment
+    (
+        'nvidia/llama-3.1-nemotron-nano-8b-v1',
+        'NVIDIA Llama 3.1 Nemotron Nano 8B',
+        '1.0',
+        'nvidia',
+        'llm',
+        'https://integrate.api.nvidia.com/v1/chat/completions',
+        131072,
+        8192,
+        0.02,
+        0.06,
+        '{"streaming": true, "function_calling": true}',
+        true,
+        'Cost-effective model optimized for edge devices and low latency',
+        NOW(),
+        NOW(),
+        0,
+        0,
+        100.0,
+        0,
+        'unknown'
+    ),
+
+    -- ==========================================
+    -- Meta Llama 3.3 (via NVIDIA NIM)
+    -- ==========================================
+
+    -- Llama 3.3 70B Instruct - Latest Llama
+    (
+        'nvidia/meta-llama-3.3-70b-instruct',
+        'NVIDIA Meta Llama 3.3 70B Instruct',
+        '1.0',
+        'nvidia',
+        'llm',
+        'https://integrate.api.nvidia.com/v1/chat/completions',
+        128000,
+        4096,
+        0.13,
+        0.4,
+        '{"streaming": true, "function_calling": true}',
+        true,
+        'Latest Meta Llama 3.3 - excellent for instruction following',
+        NOW(),
+        NOW(),
+        0,
+        0,
+        100.0,
+        0,
+        'unknown'
+    ),
+
+    -- ==========================================
+    -- DeepSeek Models (via NVIDIA NIM)
+    -- ==========================================
+
+    -- DeepSeek V3 - Hybrid inference with Think/Non-Think modes
+    (
+        'nvidia/deepseek-ai-deepseek-v3',
+        'NVIDIA DeepSeek V3',
+        '1.0',
+        'nvidia',
+        'llm',
+        'https://integrate.api.nvidia.com/v1/chat/completions',
+        128000,
+        8192,
+        0.5,
+        1.5,
+        '{"streaming": true, "function_calling": true, "reasoning": true}',
+        true,
+        'Hybrid LLM with Think/Non-Think modes, 128K context, strong tool use',
+        NOW(),
+        NOW(),
+        0,
+        0,
+        100.0,
+        0,
+        'unknown'
+    ),
+    -- DeepSeek R1 - Enhanced reasoning
+    (
+        'nvidia/deepseek-ai-deepseek-r1',
+        'NVIDIA DeepSeek R1',
+        '1.0',
+        'nvidia',
+        'llm',
+        'https://integrate.api.nvidia.com/v1/chat/completions',
+        128000,
+        8192,
+        0.6,
+        2.4,
+        '{"streaming": true, "function_calling": true, "reasoning": true}',
+        true,
+        'Enhanced reasoning model - reduced hallucination, strong math/coding',
+        NOW(),
+        NOW(),
+        0,
+        0,
+        100.0,
+        0,
+        'unknown'
+    ),
+
+    -- ==========================================
+    -- Kimi K2 (Moonshot AI via NVIDIA NIM)
+    -- ==========================================
+
+    (
+        'nvidia/moonshot-ai-kimi-k2-instruct',
+        'NVIDIA Kimi K2 Instruct',
+        '1.0',
+        'nvidia',
+        'llm',
+        'https://integrate.api.nvidia.com/v1/chat/completions',
+        128000,
+        8192,
+        0.4,
+        1.2,
+        '{"streaming": true, "function_calling": true, "reasoning": true}',
+        true,
+        'Long context window with enhanced reasoning capabilities',
+        NOW(),
+        NOW(),
+        0,
+        0,
+        100.0,
+        0,
+        'unknown'
+    ),
+
+    -- ==========================================
+    -- Mistral Models (via NVIDIA NIM)
+    -- ==========================================
+
+    -- Mistral Large 3 - State-of-the-art MoE
+    (
+        'nvidia/mistralai-mistral-large-3-instruct',
+        'NVIDIA Mistral Large 3 Instruct',
+        '1.0',
+        'nvidia',
+        'llm',
+        'https://integrate.api.nvidia.com/v1/chat/completions',
+        128000,
+        8192,
+        0.8,
+        2.4,
+        '{"streaming": true, "function_calling": true}',
+        true,
+        'State-of-the-art general purpose MoE model',
+        NOW(),
+        NOW(),
+        0,
+        0,
+        100.0,
+        0,
+        'unknown'
+    ),
+
+    -- ==========================================
+    -- Qwen Models (via NVIDIA NIM)
+    -- ==========================================
+
+    -- Qwen 3 - Ultra-long context (131K with YaRN extension)
+    (
+        'nvidia/qwen-qwen3-235b-a22b-fp8-instruct',
+        'NVIDIA Qwen 3 235B Instruct',
+        '1.0',
+        'nvidia',
+        'llm',
+        'https://integrate.api.nvidia.com/v1/chat/completions',
+        131072,
+        8192,
+        0.7,
+        2.1,
+        '{"streaming": true, "function_calling": true}',
+        true,
+        'Ultra-long context AI with strong multilingual support',
+        NOW(),
+        NOW(),
+        0,
+        0,
+        100.0,
+        0,
+        'unknown'
+    ),
+
+    -- ==========================================
+    -- Meta Llama 3.1 (via NVIDIA NIM)
+    -- ==========================================
+
+    -- Llama 3.1 405B - Largest open model
+    (
+        'nvidia/meta-llama-3.1-405b-instruct',
+        'NVIDIA Meta Llama 3.1 405B Instruct',
+        '1.0',
+        'nvidia',
+        'llm',
+        'https://integrate.api.nvidia.com/v1/chat/completions',
+        128000,
+        4096,
+        1.0,
+        3.0,
+        '{"streaming": true, "function_calling": true}',
+        true,
+        'Largest open-source LLM - exceptional quality across all tasks',
+        NOW(),
+        NOW(),
+        0,
+        0,
+        100.0,
+        0,
+        'unknown'
+    ),
+    -- Llama 3.1 70B
+    (
+        'nvidia/meta-llama-3.1-70b-instruct',
+        'NVIDIA Meta Llama 3.1 70B Instruct',
+        '1.0',
+        'nvidia',
+        'llm',
+        'https://integrate.api.nvidia.com/v1/chat/completions',
+        128000,
+        4096,
+        0.13,
+        0.4,
+        '{"streaming": true, "function_calling": true}',
+        true,
+        'Excellent balance of quality and speed',
+        NOW(),
+        NOW(),
+        0,
+        0,
+        100.0,
+        0,
+        'unknown'
+    ),
+    -- Llama 3.1 8B - Fast and efficient
+    (
+        'nvidia/meta-llama-3.1-8b-instruct',
+        'NVIDIA Meta Llama 3.1 8B Instruct',
+        '1.0',
+        'nvidia',
+        'llm',
+        'https://integrate.api.nvidia.com/v1/chat/completions',
+        128000,
+        4096,
+        0.02,
+        0.06,
+        '{"streaming": true, "function_calling": true}',
+        true,
+        'Fast and cost-effective for simpler tasks',
+        NOW(),
+        NOW(),
+        0,
+        0,
+        100.0,
+        0,
+        'unknown'
+    ),
+
+    -- ==========================================
+    -- OpenAI GPT-OSS Models (via NVIDIA NIM)
+    -- Released August 2025 - Apache 2.0 License
+    -- ==========================================
+
+    -- GPT-OSS 120B via NVIDIA NIM - Production flagship, MoE architecture (117B params, 5.7B active)
+    (
+        'nvidia/openai-gpt-oss-120b',
+        'NVIDIA OpenAI GPT-OSS 120B',
+        '1.0',
+        'nvidia',
+        'llm',
+        'https://integrate.api.nvidia.com/v1/chat/completions',
+        128000,
+        8192,
+        0.7,
+        2.1,
+        '{"streaming": true, "function_calling": true, "reasoning": true, "tool_use": true}',
+        true,
+        'OpenAI flagship open model via NVIDIA NIM - production-grade reasoning, fits single H100 GPU',
+        NOW(),
+        NOW(),
+        0,
+        0,
+        100.0,
+        0,
+        'unknown'
+    ),
+    -- GPT-OSS 20B via NVIDIA NIM - Lightweight MoE for edge/local (21B params, 4B active)
+    (
+        'nvidia/openai-gpt-oss-20b',
+        'NVIDIA OpenAI GPT-OSS 20B',
+        '1.0',
+        'nvidia',
+        'llm',
+        'https://integrate.api.nvidia.com/v1/chat/completions',
+        128000,
+        8192,
+        0.15,
+        0.45,
+        '{"streaming": true, "function_calling": true, "reasoning": true, "tool_use": true}',
+        true,
+        'OpenAI lightweight open model via NVIDIA NIM - low latency, runs in 16GB VRAM',
+        NOW(),
+        NOW(),
+        0,
+        0,
+        100.0,
+        0,
+        'unknown'
+    )
+
+ON CONFLICT (model_id) DO UPDATE SET
+    name = EXCLUDED.name,
+    version = EXCLUDED.version,
+    provider = EXCLUDED.provider,
+    endpoint = EXCLUDED.endpoint,
+    context_window = EXCLUDED.context_window,
+    max_tokens = EXCLUDED.max_tokens,
+    cost_per_million_input = EXCLUDED.cost_per_million_input,
+    cost_per_million_output = EXCLUDED.cost_per_million_output,
+    capabilities = EXCLUDED.capabilities,
+    is_active = EXCLUDED.is_active,
+    description = EXCLUDED.description,
+    updated_at = NOW();
+
+-- Assign NVIDIA models to all existing tenants with 1000 RPM rate limits
+-- Note: model_config_id (UUID) is the foreign key, model_id kept for convenience
+INSERT INTO tenant_model_configs (tenant_id, model_config_id, model_id, is_enabled, priority, rate_limits, created_at, updated_at)
+SELECT
+    t.id,
+    m.id,        -- UUID foreign key (auto-generated in model_configs)
+    m.model_id,  -- String identifier (kept for easier queries)
+    true,
+    5,
+    '{"max_requests_per_hour": 1000, "max_tokens_per_request": 4000, "concurrent_requests": 5, "max_cost_per_hour": 10.0, "requests_per_minute": 1000, "tokens_per_minute": 100000, "max_concurrent": 10}'::json,
+    NOW(),
+    NOW()
+FROM tenants t
+CROSS JOIN model_configs m
+WHERE m.provider = 'nvidia'
+ON CONFLICT (tenant_id, model_config_id) DO UPDATE SET
+    rate_limits = EXCLUDED.rate_limits;
+
+-- Log migration completion
+DO $$
+BEGIN
+    RAISE NOTICE 'Migration 021: Added NVIDIA NIM models (Nemotron, Llama 3.3, DeepSeek, Kimi K2, Mistral, Qwen, OpenAI GPT-OSS) to model_configs and assigned to tenants';
+END $$;