Upload folder using huggingface_hub
Browse files- .DS_Store +0 -0
- checkpoints/complete_diffusion_model.pth +3 -0
- checkpoints/diffusion_model_final.pth +3 -0
- checkpoints/inference_example.py +29 -0
- checkpoints/model_info.json +36 -0
- cifar10-diffusion-model.zip +3 -0
- implementation.ipynb +0 -0
- readme.md +92 -0
.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
checkpoints/complete_diffusion_model.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4266de3549124b61530bf87d88d108d0dca3602161f47a9a0979af9fd0d76c71
|
| 3 |
+
size 67281530
|
checkpoints/diffusion_model_final.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e3766a051e2774e04e0d07f33b86faf4e14581077660e8882851a3016c23f2c8
|
| 3 |
+
size 201861354
|
checkpoints/inference_example.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
# Inference script for the trained diffusion model
|
| 3 |
+
import torch
|
| 4 |
+
import torch.nn as nn
|
| 5 |
+
import torch.nn.functional as F
|
| 6 |
+
import matplotlib.pyplot as plt
|
| 7 |
+
from tqdm import tqdm
|
| 8 |
+
import math
|
| 9 |
+
|
| 10 |
+
# [Copy all the model architecture classes here - TimeEmbedding, ResidualBlock, etc.]
|
| 11 |
+
|
| 12 |
+
def load_model(checkpoint_path, device='cuda'):
|
| 13 |
+
"""Load the trained diffusion model"""
|
| 14 |
+
checkpoint = torch.load(checkpoint_path, map_location=device)
|
| 15 |
+
|
| 16 |
+
# Initialize model with saved config
|
| 17 |
+
model = SimpleUNet(**checkpoint['model_config'])
|
| 18 |
+
model.load_state_dict(checkpoint['model_state_dict'])
|
| 19 |
+
model.to(device)
|
| 20 |
+
model.eval()
|
| 21 |
+
|
| 22 |
+
# Initialize scheduler
|
| 23 |
+
scheduler = DDPMScheduler(**checkpoint['diffusion_config'], device=device)
|
| 24 |
+
|
| 25 |
+
return model, scheduler, checkpoint['model_info']
|
| 26 |
+
|
| 27 |
+
# Usage example:
|
| 28 |
+
# model, scheduler, info = load_model('complete_diffusion_model.pth')
|
| 29 |
+
# generated_images = generate_images(model, scheduler, num_images=4)
|
checkpoints/model_info.json
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_name": "CIFAR-10 Diffusion Model",
|
| 3 |
+
"architecture": "SimpleUNet",
|
| 4 |
+
"dataset": "CIFAR-10",
|
| 5 |
+
"training_details": {
|
| 6 |
+
"epochs": 20,
|
| 7 |
+
"batch_size": 128,
|
| 8 |
+
"learning_rate": 0.0001,
|
| 9 |
+
"optimizer": "AdamW",
|
| 10 |
+
"scheduler": "CosineAnnealingLR",
|
| 11 |
+
"parameters": 16808835,
|
| 12 |
+
"training_time_minutes": 14.54,
|
| 13 |
+
"final_loss": 0.0363,
|
| 14 |
+
"best_loss": 0.0358
|
| 15 |
+
},
|
| 16 |
+
"model_config": {
|
| 17 |
+
"in_channels": 3,
|
| 18 |
+
"out_channels": 3,
|
| 19 |
+
"time_emb_dim": 128,
|
| 20 |
+
"image_size": 32
|
| 21 |
+
},
|
| 22 |
+
"diffusion_config": {
|
| 23 |
+
"num_timesteps": 1000,
|
| 24 |
+
"beta_start": 0.0001,
|
| 25 |
+
"beta_end": 0.02,
|
| 26 |
+
"schedule": "linear"
|
| 27 |
+
},
|
| 28 |
+
"hardware": {
|
| 29 |
+
"gpu": "NVIDIA GeForce RTX 3060",
|
| 30 |
+
"vram_used": "0.43 GB",
|
| 31 |
+
"total_vram": "11.66 GB"
|
| 32 |
+
},
|
| 33 |
+
"created_date": "2025-07-19T17:59:48.665409",
|
| 34 |
+
"framework": "PyTorch",
|
| 35 |
+
"python_version": "3.12"
|
| 36 |
+
}
|
cifar10-diffusion-model.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:514be590d30a8d5a8207e3f18cb7fd46d4aebbc8fb2130df30645e20dff9b412
|
| 3 |
+
size 246459190
|
implementation.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
readme.md
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# CIFAR-10 Diffusion Model
|
| 2 |
+
|
| 3 |
+
🎨 **A diffusion model trained from scratch on CIFAR-10 dataset**
|
| 4 |
+
|
| 5 |
+
## Model Details
|
| 6 |
+
- **Architecture**: SimpleUNet with 16.8M parameters
|
| 7 |
+
- **Dataset**: CIFAR-10 (50,000 training images)
|
| 8 |
+
- **Training Time**: 14.54 minutes on RTX 3060
|
| 9 |
+
- **Final Loss**: 0.0363
|
| 10 |
+
- **Image Size**: 32x32 RGB
|
| 11 |
+
- **Framework**: PyTorch
|
| 12 |
+
|
| 13 |
+
## Quick Start
|
| 14 |
+
|
| 15 |
+
```python
|
| 16 |
+
import torch
|
| 17 |
+
from model import SimpleUNet, DDPMScheduler, generate_images
|
| 18 |
+
|
| 19 |
+
# Load the trained model
|
| 20 |
+
checkpoint = torch.load('complete_diffusion_model.pth')
|
| 21 |
+
model = SimpleUNet(**checkpoint['model_config'])
|
| 22 |
+
model.load_state_dict(checkpoint['model_state_dict'])
|
| 23 |
+
model.eval()
|
| 24 |
+
|
| 25 |
+
# Initialize scheduler
|
| 26 |
+
scheduler = DDPMScheduler(**checkpoint['diffusion_config'])
|
| 27 |
+
|
| 28 |
+
# Generate images
|
| 29 |
+
generated_images = generate_images(model, scheduler, num_images=8)
|
| 30 |
+
```
|
| 31 |
+
|
| 32 |
+
## Installation
|
| 33 |
+
|
| 34 |
+
```bash
|
| 35 |
+
pip install torch>=2.0.0 torchvision>=0.15.0 matplotlib tqdm pillow numpy
|
| 36 |
+
```
|
| 37 |
+
|
| 38 |
+
## Files Included
|
| 39 |
+
- `complete_diffusion_model.pth` - Complete model with config (64MB)
|
| 40 |
+
- `model_info.json` - Training details and metadata
|
| 41 |
+
- `diffusion_model_final.pth` - Final training checkpoint (64MB)
|
| 42 |
+
- `inference_example.py` - Ready-to-use inference script
|
| 43 |
+
|
| 44 |
+
## Training Details
|
| 45 |
+
- **Epochs**: 20
|
| 46 |
+
- **Batch Size**: 128
|
| 47 |
+
- **Learning Rate**: 1e-4 (CosineAnnealingLR)
|
| 48 |
+
- **Optimizer**: AdamW
|
| 49 |
+
- **GPU**: NVIDIA RTX 3060 (0.43GB VRAM used)
|
| 50 |
+
- **Loss Reduction**: 73% (from 0.1349 to 0.0363)
|
| 51 |
+
|
| 52 |
+
## Hardware Requirements
|
| 53 |
+
- **Minimum**: 1GB VRAM for inference
|
| 54 |
+
- **Recommended**: 2GB+ VRAM for training extensions
|
| 55 |
+
- **CPU**: Works but slower
|
| 56 |
+
|
| 57 |
+
## Results
|
| 58 |
+
The model generates colorful abstract patterns that capture CIFAR-10's color distributions.
|
| 59 |
+
With more training epochs (50-100), it should produce more recognizable objects.
|
| 60 |
+
|
| 61 |
+
## Improvements
|
| 62 |
+
To get better results:
|
| 63 |
+
1. **Train longer**: 50-100 epochs instead of 20
|
| 64 |
+
2. **Larger model**: Increase channels/layers
|
| 65 |
+
3. **Advanced sampling**: DDIM, DPM-Solver
|
| 66 |
+
4. **Better datasets**: CelebA, ImageNet
|
| 67 |
+
5. **Learning rate**: Experiment with schedules
|
| 68 |
+
|
| 69 |
+
## Model Architecture
|
| 70 |
+
- **U-Net based** with ResNet blocks
|
| 71 |
+
- **Time embedding** for diffusion timesteps
|
| 72 |
+
- **Attention layers** at multiple resolutions
|
| 73 |
+
- **Skip connections** for better gradient flow
|
| 74 |
+
|
| 75 |
+
## Citation
|
| 76 |
+
```bibtex
|
| 77 |
+
@misc{cifar10-diffusion-2025,
|
| 78 |
+
title={CIFAR-10 Diffusion Model},
|
| 79 |
+
author={Your Name},
|
| 80 |
+
year={2025},
|
| 81 |
+
url={https://github.com/your-username/cifar10-diffusion}
|
| 82 |
+
}
|
| 83 |
+
```
|
| 84 |
+
|
| 85 |
+
## License
|
| 86 |
+
MIT License - Feel free to use and modify!
|
| 87 |
+
|
| 88 |
+
---
|
| 89 |
+
**Created**: July 19, 2025
|
| 90 |
+
**Training Time**: 14.54 minutes
|
| 91 |
+
**GPU**: NVIDIA RTX 3060
|
| 92 |
+
**Framework**: PyTorch
|