deep-learning

PyTorch, TensorFlow, neural networks, CNNs, transformers, and deep learning for production

Safety Notice

This listing is imported from skills.sh public index metadata. Review upstream SKILL.md and repository scripts before running.

Copy this and send it to your AI assistant to learn

Install skill "deep-learning" with this command: npx skills add pluginagentmarketplace/custom-plugin-data-engineer/pluginagentmarketplace-custom-plugin-data-engineer-deep-learning

Deep Learning

Production-grade deep learning with PyTorch, neural network architectures, and modern training practices.

Quick Start

# PyTorch Production Training Loop
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.optim import AdamW
from torch.optim.lr_scheduler import CosineAnnealingLR
import wandb

class TransformerClassifier(nn.Module):
    def __init__(self, vocab_size: int, d_model: int = 256, n_heads: int = 8, n_classes: int = 2):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, d_model)
        self.pos_encoding = nn.Parameter(torch.randn(1, 512, d_model))
        encoder_layer = nn.TransformerEncoderLayer(d_model, n_heads, dim_feedforward=1024, batch_first=True)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=6)
        self.classifier = nn.Linear(d_model, n_classes)
        self.dropout = nn.Dropout(0.1)

    def forward(self, x, mask=None):
        x = self.embedding(x) + self.pos_encoding[:, :x.size(1), :]
        x = self.dropout(x)
        x = self.transformer(x, src_key_padding_mask=mask)
        x = x.mean(dim=1)  # Global average pooling
        return self.classifier(x)

# Training configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = TransformerClassifier(vocab_size=30000).to(device)
optimizer = AdamW(model.parameters(), lr=1e-4, weight_decay=0.01)
scheduler = CosineAnnealingLR(optimizer, T_max=10)
criterion = nn.CrossEntropyLoss()

# Training loop with mixed precision
scaler = torch.cuda.amp.GradScaler()

for epoch in range(10):
    model.train()
    for batch in train_loader:
        optimizer.zero_grad()
        with torch.cuda.amp.autocast():
            logits = model(batch["input_ids"].to(device))
            loss = criterion(logits, batch["labels"].to(device))
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
    scheduler.step()

Core Concepts

1. Modern Neural Network Architectures

import torch
import torch.nn as nn
import torch.nn.functional as F

class ResidualBlock(nn.Module):
    """Residual block with skip connection."""
    def __init__(self, channels: int):
        super().__init__()
        self.conv1 = nn.Conv2d(channels, channels, 3, padding=1)
        self.bn1 = nn.BatchNorm2d(channels)
        self.conv2 = nn.Conv2d(channels, channels, 3, padding=1)
        self.bn2 = nn.BatchNorm2d(channels)

    def forward(self, x):
        residual = x
        x = F.relu(self.bn1(self.conv1(x)))
        x = self.bn2(self.conv2(x))
        return F.relu(x + residual)

class AttentionBlock(nn.Module):
    """Multi-head self-attention."""
    def __init__(self, d_model: int, n_heads: int = 8):
        super().__init__()
        self.attention = nn.MultiheadAttention(d_model, n_heads, batch_first=True)
        self.norm = nn.LayerNorm(d_model)
        self.ffn = nn.Sequential(
            nn.Linear(d_model, d_model * 4),
            nn.GELU(),
            nn.Linear(d_model * 4, d_model)
        )
        self.norm2 = nn.LayerNorm(d_model)

    def forward(self, x, mask=None):
        attn_out, _ = self.attention(x, x, x, attn_mask=mask)
        x = self.norm(x + attn_out)
        return self.norm2(x + self.ffn(x))

2. Training Best Practices

from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import OneCycleLR

# Gradient clipping and accumulation
def train_epoch(model, loader, optimizer, accumulation_steps=4):
    model.train()
    optimizer.zero_grad()

    for i, batch in enumerate(loader):
        with torch.cuda.amp.autocast():
            loss = model(batch) / accumulation_steps

        scaler.scale(loss).backward()

        if (i + 1) % accumulation_steps == 0:
            scaler.unscale_(optimizer)
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()

# Early stopping
class EarlyStopping:
    def __init__(self, patience: int = 5, min_delta: float = 0.001):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_loss = float('inf')

    def __call__(self, val_loss: float) -> bool:
        if val_loss < self.best_loss - self.min_delta:
            self.best_loss = val_loss
            self.counter = 0
        else:
            self.counter += 1
        return self.counter >= self.patience

# Learning rate finder
def find_lr(model, loader, optimizer, start_lr=1e-7, end_lr=10, num_iter=100):
    lrs, losses = [], []
    lr_mult = (end_lr / start_lr) ** (1 / num_iter)

    for i, batch in enumerate(loader):
        if i >= num_iter:
            break

        lr = start_lr * (lr_mult ** i)
        for pg in optimizer.param_groups:
            pg['lr'] = lr

        loss = train_step(model, batch, optimizer)
        lrs.append(lr)
        losses.append(loss)

    return lrs, losses

3. Model Deployment

import torch.onnx
import onnxruntime as ort

# Export to ONNX
def export_to_onnx(model, sample_input, path="model.onnx"):
    model.eval()
    torch.onnx.export(
        model,
        sample_input,
        path,
        export_params=True,
        opset_version=17,
        do_constant_folding=True,
        input_names=['input'],
        output_names=['output'],
        dynamic_axes={'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}}
    )

# ONNX Runtime inference
class ONNXPredictor:
    def __init__(self, model_path: str):
        self.session = ort.InferenceSession(model_path, providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])

    def predict(self, input_data):
        return self.session.run(None, {'input': input_data})[0]

# TorchScript for production
scripted_model = torch.jit.script(model)
scripted_model.save("model_scripted.pt")

Tools & Technologies

ToolPurposeVersion (2025)
PyTorchDeep learning framework2.2+
PyTorch LightningTraining framework2.2+
Hugging FaceTransformers, datasets4.38+
ONNX RuntimeModel inference1.17+
TensorRTGPU optimization8.6+
Weights & BiasesExperiment trackingLatest
RayDistributed training2.9+

Troubleshooting Guide

IssueSymptomsRoot CauseFix
Vanishing GradientLoss not decreasingDeep network, wrong activationUse ReLU/GELU, residual connections
Exploding GradientNaN lossLearning rate too highGradient clipping, lower LR
OverfittingTrain >> Val accuracyModel too complexDropout, regularization, data aug
OOM ErrorCUDA out of memoryBatch too largeReduce batch, gradient accumulation
Slow TrainingLow GPU utilizationData loading bottleneckMore workers, prefetch

Debug Commands

# Check GPU memory
print(torch.cuda.memory_summary())

# Profile training
with torch.profiler.profile(
    activities=[torch.profiler.ProfilerActivity.CPU, torch.profiler.ProfilerActivity.CUDA]
) as prof:
    train_step(model, batch, optimizer)
print(prof.key_averages().table(sort_by="cuda_time_total"))

# Gradient flow check
for name, param in model.named_parameters():
    if param.grad is not None:
        print(f"{name}: grad_mean={param.grad.mean():.6f}")

Best Practices

# ✅ DO: Use mixed precision training
with torch.cuda.amp.autocast():
    output = model(input)

# ✅ DO: Initialize weights properly
def init_weights(m):
    if isinstance(m, nn.Linear):
        nn.init.xavier_uniform_(m.weight)

# ✅ DO: Use gradient checkpointing for large models
from torch.utils.checkpoint import checkpoint
x = checkpoint(self.layer, x)

# ✅ DO: Freeze base model for fine-tuning
for param in model.base.parameters():
    param.requires_grad = False

# ❌ DON'T: Use dropout during inference
model.eval()

# ❌ DON'T: Forget to move data to device

Resources


Skill Certification Checklist:

  • Can build and train neural networks in PyTorch
  • Can implement attention mechanisms and transformers
  • Can use mixed precision and gradient accumulation
  • Can export models to ONNX/TorchScript
  • Can debug training issues (gradients, memory)

Source Transparency

This detail page is rendered from real SKILL.md content. Trust labels are metadata-based hints, not a safety guarantee.

Related Skills

Related by shared tags or category signals.

Automation

data-engineering

No summary provided by upstream source.

Repository SourceNeeds Review
Coding

machine learning

No summary provided by upstream source.

Repository SourceNeeds Review
Coding

python-programming

No summary provided by upstream source.

Repository SourceNeeds Review
Automation

statistics-math

No summary provided by upstream source.

Repository SourceNeeds Review