JK
JustKalm
Storage Infrastructure

Storage Architecture

Enterprise-grade object storage, CDN distribution, and file processing pipelines handling 50TB+ of fashion assets with sub-100ms global delivery.

52TB

Total Storage

285+

CDN Edge Locations

<95ms

P95 Latency

1.2M

Daily Uploads

S3 Architecture

Multi-region S3 deployment with intelligent tiering and cross-region replication for durability and low-latency access.

Bucket Structure

justkalm-assets-prod/
├── products/
│   ├── images/
│   │   ├── original/      # Full-res uploads
│   │   ├── processed/     # Optimized versions
│   │   └── thumbnails/    # CDN-ready thumbs
│   └── documents/
│       ├── certificates/  # Authenticity docs
│       └── reports/       # Analysis PDFs
├── brands/
│   ├── logos/
│   └── media/
└── user-content/
    ├── uploads/
    └── exports/

S3 Configuration

# storage/config.py
from dataclasses import dataclass
from enum import Enum

class StorageClass(Enum):
    STANDARD = "STANDARD"
    INTELLIGENT_TIERING = "INTELLIGENT_TIERING"
    GLACIER_IR = "GLACIER_IR"  # Instant Retrieval
    DEEP_ARCHIVE = "DEEP_ARCHIVE"

@dataclass
class BucketConfig:
    name: str
    region: str
    versioning: bool = True
    encryption: str = "AES256"
    replication: bool = True
    
    @property
    def replication_rules(self):
        return {
            "Role": f"arn:aws:iam::ACCOUNT:role/s3-replication",
            "Rules": [{
                "ID": "CrossRegionReplication",
                "Status": "Enabled",
                "Priority": 1,
                "DeleteMarkerReplication": {"Status": "Disabled"},
                "Filter": {"Prefix": ""},
                "Destination": {
                    "Bucket": f"arn:aws:s3:::{self.name}-replica",
                    "ReplicationTime": {"Status": "Enabled", "Time": {"Minutes": 15}},
                    "Metrics": {"Status": "Enabled", "EventThreshold": {"Minutes": 15}},
                    "StorageClass": "STANDARD_IA"
                }
            }]
        }

PROD_BUCKETS = {
    "assets": BucketConfig(
        name="justkalm-assets-prod",
        region="us-east-1",
        versioning=True,
        encryption="aws:kms",
        replication=True
    ),
    "backups": BucketConfig(
        name="justkalm-backups-prod",
        region="us-west-2",
        versioning=True,
        encryption="aws:kms",
        replication=False
    )
}

Storage Service

# storage/service.py
import boto3
import hashlib
from typing import BinaryIO, Optional
from botocore.config import Config
from .config import PROD_BUCKETS, StorageClass

class StorageService:
    """
    Enterprise storage service with presigned URLs,
    multipart uploads, and intelligent tiering.
    """
    
    def __init__(self):
        self.s3 = boto3.client(
            's3',
            config=Config(
                signature_version='s3v4',
                retries={'max_attempts': 3, 'mode': 'adaptive'}
            )
        )
        self.transfer_config = boto3.s3.transfer.TransferConfig(
            multipart_threshold=8 * 1024 * 1024,  # 8MB
            max_concurrency=10,
            multipart_chunksize=8 * 1024 * 1024,
            use_threads=True
        )
    
    async def upload_file(
        self,
        file: BinaryIO,
        key: str,
        content_type: str,
        storage_class: StorageClass = StorageClass.INTELLIGENT_TIERING,
        metadata: Optional[dict] = None
    ) -> dict:
        """Upload file with content-based deduplication."""
        
        # Calculate content hash for deduplication
        content = file.read()
        content_hash = hashlib.sha256(content).hexdigest()
        file.seek(0)
        
        # Check if content already exists
        existing = await self._find_by_hash(content_hash)
        if existing:
            return {"key": existing, "deduplicated": True}
        
        # Prepare upload parameters
        extra_args = {
            "ContentType": content_type,
            "StorageClass": storage_class.value,
            "ServerSideEncryption": "aws:kms",
            "Metadata": {
                "content-hash": content_hash,
                "uploaded-at": datetime.utcnow().isoformat(),
                **(metadata or {})
            }
        }
        
        # Upload with automatic multipart
        bucket = PROD_BUCKETS["assets"].name
        self.s3.upload_fileobj(
            file, bucket, key,
            Config=self.transfer_config,
            ExtraArgs=extra_args
        )
        
        return {
            "key": key,
            "bucket": bucket,
            "hash": content_hash,
            "url": f"https://{bucket}.s3.amazonaws.com/{key}",
            "cdn_url": f"https://cdn.justkalm.com/{key}"
        }
    
    def generate_presigned_upload(
        self,
        key: str,
        content_type: str,
        expires_in: int = 3600,
        max_size_mb: int = 50
    ) -> dict:
        """Generate presigned URL for direct browser uploads."""
        
        bucket = PROD_BUCKETS["assets"].name
        
        conditions = [
            ["content-length-range", 0, max_size_mb * 1024 * 1024],
            {"Content-Type": content_type},
            {"x-amz-storage-class": "INTELLIGENT_TIERING"}
        ]
        
        presigned = self.s3.generate_presigned_post(
            bucket,
            key,
            Fields={
                "Content-Type": content_type,
                "x-amz-storage-class": "INTELLIGENT_TIERING"
            },
            Conditions=conditions,
            ExpiresIn=expires_in
        )
        
        return {
            "url": presigned["url"],
            "fields": presigned["fields"],
            "expires_in": expires_in,
            "max_size_mb": max_size_mb
        }
    
    def generate_presigned_download(
        self,
        key: str,
        expires_in: int = 3600,
        version_id: Optional[str] = None
    ) -> str:
        """Generate presigned URL for secure downloads."""
        
        params = {
            "Bucket": PROD_BUCKETS["assets"].name,
            "Key": key
        }
        if version_id:
            params["VersionId"] = version_id
        
        return self.s3.generate_presigned_url(
            "get_object",
            Params=params,
            ExpiresIn=expires_in
        )

Global Scale Storage Infrastructure

Built for reliability, optimized for performance, designed for scale.

11 9's Durability99.99% Availability285+ Edge Locations