Storage Infrastructure
Storage Architecture
Enterprise-grade object storage, CDN distribution, and file processing pipelines handling 50TB+ of fashion assets with sub-100ms global delivery.
52TB
Total Storage
285+
CDN Edge Locations
<95ms
P95 Latency
1.2M
Daily Uploads
S3 Architecture
Multi-region S3 deployment with intelligent tiering and cross-region replication for durability and low-latency access.
Bucket Structure
justkalm-assets-prod/
├── products/
│ ├── images/
│ │ ├── original/ # Full-res uploads
│ │ ├── processed/ # Optimized versions
│ │ └── thumbnails/ # CDN-ready thumbs
│ └── documents/
│ ├── certificates/ # Authenticity docs
│ └── reports/ # Analysis PDFs
├── brands/
│ ├── logos/
│ └── media/
└── user-content/
├── uploads/
└── exports/S3 Configuration
# storage/config.py
from dataclasses import dataclass
from enum import Enum
class StorageClass(Enum):
STANDARD = "STANDARD"
INTELLIGENT_TIERING = "INTELLIGENT_TIERING"
GLACIER_IR = "GLACIER_IR" # Instant Retrieval
DEEP_ARCHIVE = "DEEP_ARCHIVE"
@dataclass
class BucketConfig:
name: str
region: str
versioning: bool = True
encryption: str = "AES256"
replication: bool = True
@property
def replication_rules(self):
return {
"Role": f"arn:aws:iam::ACCOUNT:role/s3-replication",
"Rules": [{
"ID": "CrossRegionReplication",
"Status": "Enabled",
"Priority": 1,
"DeleteMarkerReplication": {"Status": "Disabled"},
"Filter": {"Prefix": ""},
"Destination": {
"Bucket": f"arn:aws:s3:::{self.name}-replica",
"ReplicationTime": {"Status": "Enabled", "Time": {"Minutes": 15}},
"Metrics": {"Status": "Enabled", "EventThreshold": {"Minutes": 15}},
"StorageClass": "STANDARD_IA"
}
}]
}
PROD_BUCKETS = {
"assets": BucketConfig(
name="justkalm-assets-prod",
region="us-east-1",
versioning=True,
encryption="aws:kms",
replication=True
),
"backups": BucketConfig(
name="justkalm-backups-prod",
region="us-west-2",
versioning=True,
encryption="aws:kms",
replication=False
)
}Storage Service
# storage/service.py
import boto3
import hashlib
from typing import BinaryIO, Optional
from botocore.config import Config
from .config import PROD_BUCKETS, StorageClass
class StorageService:
"""
Enterprise storage service with presigned URLs,
multipart uploads, and intelligent tiering.
"""
def __init__(self):
self.s3 = boto3.client(
's3',
config=Config(
signature_version='s3v4',
retries={'max_attempts': 3, 'mode': 'adaptive'}
)
)
self.transfer_config = boto3.s3.transfer.TransferConfig(
multipart_threshold=8 * 1024 * 1024, # 8MB
max_concurrency=10,
multipart_chunksize=8 * 1024 * 1024,
use_threads=True
)
async def upload_file(
self,
file: BinaryIO,
key: str,
content_type: str,
storage_class: StorageClass = StorageClass.INTELLIGENT_TIERING,
metadata: Optional[dict] = None
) -> dict:
"""Upload file with content-based deduplication."""
# Calculate content hash for deduplication
content = file.read()
content_hash = hashlib.sha256(content).hexdigest()
file.seek(0)
# Check if content already exists
existing = await self._find_by_hash(content_hash)
if existing:
return {"key": existing, "deduplicated": True}
# Prepare upload parameters
extra_args = {
"ContentType": content_type,
"StorageClass": storage_class.value,
"ServerSideEncryption": "aws:kms",
"Metadata": {
"content-hash": content_hash,
"uploaded-at": datetime.utcnow().isoformat(),
**(metadata or {})
}
}
# Upload with automatic multipart
bucket = PROD_BUCKETS["assets"].name
self.s3.upload_fileobj(
file, bucket, key,
Config=self.transfer_config,
ExtraArgs=extra_args
)
return {
"key": key,
"bucket": bucket,
"hash": content_hash,
"url": f"https://{bucket}.s3.amazonaws.com/{key}",
"cdn_url": f"https://cdn.justkalm.com/{key}"
}
def generate_presigned_upload(
self,
key: str,
content_type: str,
expires_in: int = 3600,
max_size_mb: int = 50
) -> dict:
"""Generate presigned URL for direct browser uploads."""
bucket = PROD_BUCKETS["assets"].name
conditions = [
["content-length-range", 0, max_size_mb * 1024 * 1024],
{"Content-Type": content_type},
{"x-amz-storage-class": "INTELLIGENT_TIERING"}
]
presigned = self.s3.generate_presigned_post(
bucket,
key,
Fields={
"Content-Type": content_type,
"x-amz-storage-class": "INTELLIGENT_TIERING"
},
Conditions=conditions,
ExpiresIn=expires_in
)
return {
"url": presigned["url"],
"fields": presigned["fields"],
"expires_in": expires_in,
"max_size_mb": max_size_mb
}
def generate_presigned_download(
self,
key: str,
expires_in: int = 3600,
version_id: Optional[str] = None
) -> str:
"""Generate presigned URL for secure downloads."""
params = {
"Bucket": PROD_BUCKETS["assets"].name,
"Key": key
}
if version_id:
params["VersionId"] = version_id
return self.s3.generate_presigned_url(
"get_object",
Params=params,
ExpiresIn=expires_in
)Global Scale Storage Infrastructure
Built for reliability, optimized for performance, designed for scale.
11 9's Durability99.99% Availability285+ Edge Locations