GT AI OS Community Edition v2.0.33
Security hardening release addressing CodeQL and Dependabot alerts: - Fix stack trace exposure in error responses - Add SSRF protection with DNS resolution checking - Implement proper URL hostname validation (replaces substring matching) - Add centralized path sanitization to prevent path traversal - Fix ReDoS vulnerability in email validation regex - Improve HTML sanitization in validation utilities - Fix capability wildcard matching in auth utilities - Update glob dependency to address CVE - Add CodeQL suppression comments for verified false positives 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
585
apps/tenant-backend/app/services/dataset_sharing.py
Normal file
585
apps/tenant-backend/app/services/dataset_sharing.py
Normal file
@@ -0,0 +1,585 @@
|
||||
"""
|
||||
Dataset Sharing Service for GT 2.0
|
||||
|
||||
Implements hierarchical dataset sharing with perfect tenant isolation.
|
||||
Enables secure data collaboration while maintaining ownership and access control.
|
||||
"""
|
||||
|
||||
import os
|
||||
import stat
|
||||
import json
|
||||
import logging
|
||||
from typing import Dict, Any, List, Optional, Tuple
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
from uuid import uuid4
|
||||
|
||||
from app.models.access_group import AccessGroup, Resource
|
||||
from app.services.access_controller import AccessController
|
||||
from app.core.security import verify_capability_token
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SharingPermission(Enum):
|
||||
"""Sharing permission levels"""
|
||||
READ = "read" # Can view and search dataset
|
||||
WRITE = "write" # Can add documents
|
||||
ADMIN = "admin" # Can modify sharing settings
|
||||
|
||||
|
||||
@dataclass
|
||||
class DatasetShare:
|
||||
"""Dataset sharing configuration"""
|
||||
id: str = field(default_factory=lambda: str(uuid4()))
|
||||
dataset_id: str = ""
|
||||
owner_id: str = ""
|
||||
access_group: AccessGroup = AccessGroup.INDIVIDUAL
|
||||
team_members: List[str] = field(default_factory=list)
|
||||
team_permissions: Dict[str, SharingPermission] = field(default_factory=dict)
|
||||
shared_at: datetime = field(default_factory=datetime.utcnow)
|
||||
expires_at: Optional[datetime] = None
|
||||
is_active: bool = True
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary for storage"""
|
||||
return {
|
||||
"id": self.id,
|
||||
"dataset_id": self.dataset_id,
|
||||
"owner_id": self.owner_id,
|
||||
"access_group": self.access_group.value,
|
||||
"team_members": self.team_members,
|
||||
"team_permissions": {k: v.value for k, v in self.team_permissions.items()},
|
||||
"shared_at": self.shared_at.isoformat(),
|
||||
"expires_at": self.expires_at.isoformat() if self.expires_at else None,
|
||||
"is_active": self.is_active
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> "DatasetShare":
|
||||
"""Create from dictionary"""
|
||||
return cls(
|
||||
id=data.get("id", str(uuid4())),
|
||||
dataset_id=data["dataset_id"],
|
||||
owner_id=data["owner_id"],
|
||||
access_group=AccessGroup(data["access_group"]),
|
||||
team_members=data.get("team_members", []),
|
||||
team_permissions={
|
||||
k: SharingPermission(v) for k, v in data.get("team_permissions", {}).items()
|
||||
},
|
||||
shared_at=datetime.fromisoformat(data["shared_at"]),
|
||||
expires_at=datetime.fromisoformat(data["expires_at"]) if data.get("expires_at") else None,
|
||||
is_active=data.get("is_active", True)
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class DatasetInfo:
|
||||
"""Dataset information for sharing"""
|
||||
id: str
|
||||
name: str
|
||||
description: str
|
||||
owner_id: str
|
||||
document_count: int
|
||||
size_bytes: int
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
tags: List[str] = field(default_factory=list)
|
||||
|
||||
|
||||
class DatasetSharingService:
|
||||
"""
|
||||
Service for hierarchical dataset sharing with capability-based access control.
|
||||
|
||||
Features:
|
||||
- Individual, Team, and Organization level sharing
|
||||
- Granular permission management (read, write, admin)
|
||||
- Time-based expiration of shares
|
||||
- Perfect tenant isolation through file-based storage
|
||||
- Event emission for sharing activities
|
||||
"""
|
||||
|
||||
def __init__(self, tenant_domain: str, access_controller: AccessController):
|
||||
self.tenant_domain = tenant_domain
|
||||
self.access_controller = access_controller
|
||||
self.base_path = Path(f"/data/{tenant_domain}/dataset_sharing")
|
||||
self.shares_path = self.base_path / "shares"
|
||||
self.datasets_path = self.base_path / "datasets"
|
||||
|
||||
# Ensure directories exist with proper permissions
|
||||
self._ensure_directories()
|
||||
|
||||
logger.info(f"DatasetSharingService initialized for {tenant_domain}")
|
||||
|
||||
def _ensure_directories(self):
|
||||
"""Ensure sharing directories exist with proper permissions"""
|
||||
for path in [self.shares_path, self.datasets_path]:
|
||||
path.mkdir(parents=True, exist_ok=True)
|
||||
# Set permissions to 700 (owner only)
|
||||
os.chmod(path, stat.S_IRWXU)
|
||||
|
||||
async def share_dataset(
|
||||
self,
|
||||
dataset_id: str,
|
||||
owner_id: str,
|
||||
access_group: AccessGroup,
|
||||
team_members: Optional[List[str]] = None,
|
||||
team_permissions: Optional[Dict[str, SharingPermission]] = None,
|
||||
expires_at: Optional[datetime] = None,
|
||||
capability_token: str = ""
|
||||
) -> DatasetShare:
|
||||
"""
|
||||
Share a dataset with specified access group.
|
||||
|
||||
Args:
|
||||
dataset_id: Dataset to share
|
||||
owner_id: Owner of the dataset
|
||||
access_group: Level of sharing (Individual, Team, Organization)
|
||||
team_members: List of team members (if Team access)
|
||||
team_permissions: Permissions for each team member
|
||||
expires_at: Optional expiration time
|
||||
capability_token: JWT capability token
|
||||
|
||||
Returns:
|
||||
DatasetShare configuration
|
||||
"""
|
||||
# Verify capability token
|
||||
token_data = verify_capability_token(capability_token)
|
||||
if not token_data or token_data.get("tenant_id") != self.tenant_domain:
|
||||
raise PermissionError("Invalid capability token")
|
||||
|
||||
# Verify ownership
|
||||
dataset_resource = await self._load_dataset_resource(dataset_id)
|
||||
if not dataset_resource or dataset_resource.owner_id != owner_id:
|
||||
raise PermissionError("Only dataset owner can modify sharing")
|
||||
|
||||
# Validate team members for team sharing
|
||||
if access_group == AccessGroup.TEAM:
|
||||
if not team_members:
|
||||
raise ValueError("Team members required for team sharing")
|
||||
|
||||
# Ensure all team members are valid users in tenant
|
||||
for member in team_members:
|
||||
if not await self._is_valid_tenant_user(member):
|
||||
logger.warning(f"Invalid team member: {member}")
|
||||
|
||||
# Create sharing configuration
|
||||
share = DatasetShare(
|
||||
dataset_id=dataset_id,
|
||||
owner_id=owner_id,
|
||||
access_group=access_group,
|
||||
team_members=team_members or [],
|
||||
team_permissions=team_permissions or {},
|
||||
expires_at=expires_at
|
||||
)
|
||||
|
||||
# Set default permissions for team members
|
||||
if access_group == AccessGroup.TEAM:
|
||||
for member in share.team_members:
|
||||
if member not in share.team_permissions:
|
||||
share.team_permissions[member] = SharingPermission.READ
|
||||
|
||||
# Store sharing configuration
|
||||
await self._store_share(share)
|
||||
|
||||
# Update dataset resource access group
|
||||
await self.access_controller.update_resource_access(
|
||||
owner_id, dataset_id, access_group, team_members
|
||||
)
|
||||
|
||||
# Emit sharing event
|
||||
if hasattr(self.access_controller, 'event_bus'):
|
||||
await self.access_controller.event_bus.emit_event(
|
||||
"dataset.shared",
|
||||
owner_id,
|
||||
{
|
||||
"dataset_id": dataset_id,
|
||||
"access_group": access_group.value,
|
||||
"team_members": team_members or [],
|
||||
"expires_at": expires_at.isoformat() if expires_at else None
|
||||
}
|
||||
)
|
||||
|
||||
logger.info(f"Dataset {dataset_id} shared as {access_group.value} by {owner_id}")
|
||||
return share
|
||||
|
||||
async def get_dataset_sharing(
|
||||
self,
|
||||
dataset_id: str,
|
||||
user_id: str,
|
||||
capability_token: str
|
||||
) -> Optional[DatasetShare]:
|
||||
"""
|
||||
Get sharing configuration for a dataset.
|
||||
|
||||
Args:
|
||||
dataset_id: Dataset ID
|
||||
user_id: Requesting user
|
||||
capability_token: JWT capability token
|
||||
|
||||
Returns:
|
||||
DatasetShare if user has access, None otherwise
|
||||
"""
|
||||
# Verify capability token
|
||||
token_data = verify_capability_token(capability_token)
|
||||
if not token_data or token_data.get("tenant_id") != self.tenant_domain:
|
||||
raise PermissionError("Invalid capability token")
|
||||
|
||||
# Load sharing configuration
|
||||
share = await self._load_share(dataset_id)
|
||||
if not share:
|
||||
return None
|
||||
|
||||
# Check if user has access to view sharing info
|
||||
if share.owner_id == user_id:
|
||||
return share # Owner can always see
|
||||
|
||||
if share.access_group == AccessGroup.TEAM and user_id in share.team_members:
|
||||
return share # Team member can see
|
||||
|
||||
if share.access_group == AccessGroup.ORGANIZATION:
|
||||
# All tenant users can see organization shares
|
||||
if await self._is_valid_tenant_user(user_id):
|
||||
return share
|
||||
|
||||
return None
|
||||
|
||||
async def check_dataset_access(
|
||||
self,
|
||||
dataset_id: str,
|
||||
user_id: str,
|
||||
permission: SharingPermission = SharingPermission.READ
|
||||
) -> Tuple[bool, Optional[str]]:
|
||||
"""
|
||||
Check if user has specified permission on dataset.
|
||||
|
||||
Args:
|
||||
dataset_id: Dataset to check
|
||||
user_id: User requesting access
|
||||
permission: Required permission level
|
||||
|
||||
Returns:
|
||||
Tuple of (allowed, reason)
|
||||
"""
|
||||
# Load sharing configuration
|
||||
share = await self._load_share(dataset_id)
|
||||
if not share or not share.is_active:
|
||||
return False, "Dataset not shared or sharing inactive"
|
||||
|
||||
# Check expiration
|
||||
if share.expires_at and datetime.utcnow() > share.expires_at:
|
||||
return False, "Dataset sharing has expired"
|
||||
|
||||
# Owner has all permissions
|
||||
if share.owner_id == user_id:
|
||||
return True, "Owner access"
|
||||
|
||||
# Check access group permissions
|
||||
if share.access_group == AccessGroup.INDIVIDUAL:
|
||||
return False, "Private dataset"
|
||||
|
||||
elif share.access_group == AccessGroup.TEAM:
|
||||
if user_id not in share.team_members:
|
||||
return False, "Not a team member"
|
||||
|
||||
# Check specific permission
|
||||
user_permission = share.team_permissions.get(user_id, SharingPermission.READ)
|
||||
if self._has_permission(user_permission, permission):
|
||||
return True, f"Team member with {user_permission.value} permission"
|
||||
else:
|
||||
return False, f"Insufficient permission: has {user_permission.value}, needs {permission.value}"
|
||||
|
||||
elif share.access_group == AccessGroup.ORGANIZATION:
|
||||
# Organization sharing is typically read-only
|
||||
if permission == SharingPermission.READ:
|
||||
if await self._is_valid_tenant_user(user_id):
|
||||
return True, "Organization-wide read access"
|
||||
return False, "Organization access is read-only"
|
||||
|
||||
return False, "Unknown access configuration"
|
||||
|
||||
async def list_accessible_datasets(
|
||||
self,
|
||||
user_id: str,
|
||||
capability_token: str,
|
||||
include_owned: bool = True,
|
||||
include_shared: bool = True
|
||||
) -> List[DatasetInfo]:
|
||||
"""
|
||||
List datasets accessible to user.
|
||||
|
||||
Args:
|
||||
user_id: User requesting list
|
||||
capability_token: JWT capability token
|
||||
include_owned: Include user's own datasets
|
||||
include_shared: Include datasets shared with user
|
||||
|
||||
Returns:
|
||||
List of accessible datasets
|
||||
"""
|
||||
# Verify capability token
|
||||
token_data = verify_capability_token(capability_token)
|
||||
if not token_data or token_data.get("tenant_id") != self.tenant_domain:
|
||||
raise PermissionError("Invalid capability token")
|
||||
|
||||
accessible_datasets = []
|
||||
|
||||
# Get all dataset shares
|
||||
all_shares = await self._list_all_shares()
|
||||
|
||||
for share in all_shares:
|
||||
# Skip inactive or expired shares
|
||||
if not share.is_active:
|
||||
continue
|
||||
if share.expires_at and datetime.utcnow() > share.expires_at:
|
||||
continue
|
||||
|
||||
# Check if user has access
|
||||
has_access = False
|
||||
|
||||
if include_owned and share.owner_id == user_id:
|
||||
has_access = True
|
||||
elif include_shared:
|
||||
allowed, _ = await self.check_dataset_access(share.dataset_id, user_id)
|
||||
has_access = allowed
|
||||
|
||||
if has_access:
|
||||
dataset_info = await self._load_dataset_info(share.dataset_id)
|
||||
if dataset_info:
|
||||
accessible_datasets.append(dataset_info)
|
||||
|
||||
return accessible_datasets
|
||||
|
||||
async def revoke_dataset_sharing(
|
||||
self,
|
||||
dataset_id: str,
|
||||
owner_id: str,
|
||||
capability_token: str
|
||||
) -> bool:
|
||||
"""
|
||||
Revoke dataset sharing (make it private).
|
||||
|
||||
Args:
|
||||
dataset_id: Dataset to make private
|
||||
owner_id: Owner of the dataset
|
||||
capability_token: JWT capability token
|
||||
|
||||
Returns:
|
||||
True if revoked successfully
|
||||
"""
|
||||
# Verify capability token
|
||||
token_data = verify_capability_token(capability_token)
|
||||
if not token_data or token_data.get("tenant_id") != self.tenant_domain:
|
||||
raise PermissionError("Invalid capability token")
|
||||
|
||||
# Verify ownership
|
||||
share = await self._load_share(dataset_id)
|
||||
if not share or share.owner_id != owner_id:
|
||||
raise PermissionError("Only dataset owner can revoke sharing")
|
||||
|
||||
# Update sharing to individual (private)
|
||||
share.access_group = AccessGroup.INDIVIDUAL
|
||||
share.team_members = []
|
||||
share.team_permissions = {}
|
||||
share.is_active = False
|
||||
|
||||
# Store updated share
|
||||
await self._store_share(share)
|
||||
|
||||
# Update resource access
|
||||
await self.access_controller.update_resource_access(
|
||||
owner_id, dataset_id, AccessGroup.INDIVIDUAL, []
|
||||
)
|
||||
|
||||
# Emit revocation event
|
||||
if hasattr(self.access_controller, 'event_bus'):
|
||||
await self.access_controller.event_bus.emit_event(
|
||||
"dataset.sharing_revoked",
|
||||
owner_id,
|
||||
{"dataset_id": dataset_id}
|
||||
)
|
||||
|
||||
logger.info(f"Dataset {dataset_id} sharing revoked by {owner_id}")
|
||||
return True
|
||||
|
||||
async def update_team_permissions(
|
||||
self,
|
||||
dataset_id: str,
|
||||
owner_id: str,
|
||||
user_id: str,
|
||||
permission: SharingPermission,
|
||||
capability_token: str
|
||||
) -> bool:
|
||||
"""
|
||||
Update team member permissions for a dataset.
|
||||
|
||||
Args:
|
||||
dataset_id: Dataset ID
|
||||
owner_id: Owner of the dataset
|
||||
user_id: Team member to update
|
||||
permission: New permission level
|
||||
capability_token: JWT capability token
|
||||
|
||||
Returns:
|
||||
True if updated successfully
|
||||
"""
|
||||
# Verify capability token
|
||||
token_data = verify_capability_token(capability_token)
|
||||
if not token_data or token_data.get("tenant_id") != self.tenant_domain:
|
||||
raise PermissionError("Invalid capability token")
|
||||
|
||||
# Load and verify sharing
|
||||
share = await self._load_share(dataset_id)
|
||||
if not share or share.owner_id != owner_id:
|
||||
raise PermissionError("Only dataset owner can update permissions")
|
||||
|
||||
if share.access_group != AccessGroup.TEAM:
|
||||
raise ValueError("Can only update permissions for team-shared datasets")
|
||||
|
||||
if user_id not in share.team_members:
|
||||
raise ValueError("User is not a team member")
|
||||
|
||||
# Update permission
|
||||
share.team_permissions[user_id] = permission
|
||||
|
||||
# Store updated share
|
||||
await self._store_share(share)
|
||||
|
||||
logger.info(f"Updated {user_id} permission to {permission.value} for dataset {dataset_id}")
|
||||
return True
|
||||
|
||||
async def get_sharing_statistics(
|
||||
self,
|
||||
user_id: str,
|
||||
capability_token: str
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Get sharing statistics for user.
|
||||
|
||||
Args:
|
||||
user_id: User to get stats for
|
||||
capability_token: JWT capability token
|
||||
|
||||
Returns:
|
||||
Statistics dictionary
|
||||
"""
|
||||
# Verify capability token
|
||||
token_data = verify_capability_token(capability_token)
|
||||
if not token_data or token_data.get("tenant_id") != self.tenant_domain:
|
||||
raise PermissionError("Invalid capability token")
|
||||
|
||||
stats = {
|
||||
"owned_datasets": 0,
|
||||
"shared_with_me": 0,
|
||||
"sharing_breakdown": {
|
||||
AccessGroup.INDIVIDUAL: 0,
|
||||
AccessGroup.TEAM: 0,
|
||||
AccessGroup.ORGANIZATION: 0
|
||||
},
|
||||
"total_team_members": 0,
|
||||
"expired_shares": 0
|
||||
}
|
||||
|
||||
all_shares = await self._list_all_shares()
|
||||
|
||||
for share in all_shares:
|
||||
# Count owned datasets
|
||||
if share.owner_id == user_id:
|
||||
stats["owned_datasets"] += 1
|
||||
stats["sharing_breakdown"][share.access_group] += 1
|
||||
stats["total_team_members"] += len(share.team_members)
|
||||
|
||||
# Count expired shares
|
||||
if share.expires_at and datetime.utcnow() > share.expires_at:
|
||||
stats["expired_shares"] += 1
|
||||
|
||||
# Count datasets shared with user
|
||||
elif user_id in share.team_members or share.access_group == AccessGroup.ORGANIZATION:
|
||||
if share.is_active and (not share.expires_at or datetime.utcnow() <= share.expires_at):
|
||||
stats["shared_with_me"] += 1
|
||||
|
||||
return stats
|
||||
|
||||
def _has_permission(self, user_permission: SharingPermission, required: SharingPermission) -> bool:
|
||||
"""Check if user permission satisfies required permission"""
|
||||
permission_hierarchy = {
|
||||
SharingPermission.READ: 1,
|
||||
SharingPermission.WRITE: 2,
|
||||
SharingPermission.ADMIN: 3
|
||||
}
|
||||
|
||||
return permission_hierarchy[user_permission] >= permission_hierarchy[required]
|
||||
|
||||
async def _store_share(self, share: DatasetShare):
|
||||
"""Store sharing configuration to file system"""
|
||||
share_file = self.shares_path / f"{share.dataset_id}.json"
|
||||
|
||||
with open(share_file, "w") as f:
|
||||
json.dump(share.to_dict(), f, indent=2)
|
||||
|
||||
# Set secure permissions
|
||||
os.chmod(share_file, stat.S_IRUSR | stat.S_IWUSR) # 600
|
||||
|
||||
async def _load_share(self, dataset_id: str) -> Optional[DatasetShare]:
|
||||
"""Load sharing configuration from file system"""
|
||||
share_file = self.shares_path / f"{dataset_id}.json"
|
||||
|
||||
if not share_file.exists():
|
||||
return None
|
||||
|
||||
try:
|
||||
with open(share_file, "r") as f:
|
||||
data = json.load(f)
|
||||
return DatasetShare.from_dict(data)
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading share for dataset {dataset_id}: {e}")
|
||||
return None
|
||||
|
||||
async def _list_all_shares(self) -> List[DatasetShare]:
|
||||
"""List all sharing configurations"""
|
||||
shares = []
|
||||
|
||||
if self.shares_path.exists():
|
||||
for share_file in self.shares_path.glob("*.json"):
|
||||
try:
|
||||
with open(share_file, "r") as f:
|
||||
data = json.load(f)
|
||||
shares.append(DatasetShare.from_dict(data))
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading share file {share_file}: {e}")
|
||||
|
||||
return shares
|
||||
|
||||
async def _load_dataset_resource(self, dataset_id: str) -> Optional[Resource]:
|
||||
"""Load dataset resource (implementation would query storage)"""
|
||||
# Placeholder - would integrate with actual resource storage
|
||||
return Resource(
|
||||
id=dataset_id,
|
||||
name=f"Dataset {dataset_id}",
|
||||
resource_type="dataset",
|
||||
owner_id="mock_owner",
|
||||
tenant_domain=self.tenant_domain,
|
||||
access_group=AccessGroup.INDIVIDUAL
|
||||
)
|
||||
|
||||
async def _load_dataset_info(self, dataset_id: str) -> Optional[DatasetInfo]:
|
||||
"""Load dataset information (implementation would query storage)"""
|
||||
# Placeholder - would integrate with actual dataset storage
|
||||
return DatasetInfo(
|
||||
id=dataset_id,
|
||||
name=f"Dataset {dataset_id}",
|
||||
description="Mock dataset for testing",
|
||||
owner_id="mock_owner",
|
||||
document_count=10,
|
||||
size_bytes=1024000,
|
||||
created_at=datetime.utcnow(),
|
||||
updated_at=datetime.utcnow(),
|
||||
tags=["test", "mock"]
|
||||
)
|
||||
|
||||
async def _is_valid_tenant_user(self, user_id: str) -> bool:
|
||||
"""Check if user is valid in tenant (implementation would query user store)"""
|
||||
# Placeholder - would integrate with actual user management
|
||||
return "@" in user_id and user_id.endswith((".com", ".org", ".edu"))
|
||||
Reference in New Issue
Block a user