{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "import argparse\n", "import torch\n", "import pytorch_lightning as pl\n", "from pytorch_lightning.loggers import WandbLogger\n", "from pytorch_lightning.callbacks import (\n", " TQDMProgressBar, ModelCheckpoint, EarlyStopping\n", ")\n", "from q2l_labeller.data.coco_data_module import COCODataModule\n", "from q2l_labeller.pl_modules.query2label_train_module import Query2LabelTrainModule\n", "from q2l_labeller.data.dataset import SeaThruAugmentation\n", "\n", "# Set random seed\n", "pl.seed_everything(40)\n", "torch.backends.cudnn.benchmark = True\n", "\n", "# Argument Parser for Dynamic Dataset Selection\n", "parser = argparse.ArgumentParser(description=\"Depth-Jitter Training Script\")\n", "parser.add_argument(\n", " \"--dataset\", type=str, choices=[\"UTDAC2020\", \"FathomNet\"], default=\"FathomNet\",\n", " help=\"Select dataset: 'UTDAC2020' or 'FathomNet' (default: FathomNet)\"\n", ")\n", "args = parser.parse_args()\n", "\n", "# Dataset configurations\n", "datasets = {\n", " \"UTDAC2020\": {\n", " \"image_folder\": \"/home/mundus/mrahman528/thesis/thesis_paper/UTDAC2020\",\n", " \"depth_image_folder\": \"/home/mundus/mrahman528/thesis/thesis_paper/UTDAC2020/depth_train\",\n", " \"depth_npy_folder\": \"/home/mundus/mrahman528/thesis/thesis_paper/UTDAC2020/depth_train\",\n", " \"seathru_parameters_path\": \"/home/mundus/mrahman528/thesis/thesis_paper/parameters_train.json\",\n", " \"depth_variance_path\": \"/home/mundus/mrahman528/Depth-Jitter/depth_variance_utdac.json\",\n", " \"threshold\": 9.49, # Precomputed threshold\n", " \"num_classes\": 4\n", " },\n", " \"FathomNet\": {\n", " \"image_folder\": \"/home/mundus/mrahman528/projects/mir/depth_jitter/fathomnet_2023_dataset\",\n", " \"depth_image_folder\": \"/home/mundus/mrahman528/projects/mir/depth_jitter/fathomnet_2023_dataset/depth_vis_train\",\n", " \"depth_npy_folder\": \"/home/mundus/mrahman528/projects/mir/depth_jitter/fathomnet_2023_dataset/depth_vis_train\",\n", " \"seathru_parameters_path\": \"/home/mundus/mrahman528/Depth-Jitter/parameters_train.json\",\n", " \"depth_variance_path\": \"/home/mundus/mrahman528/Depth-Jitter/depth_variance_fathomnet.json\",\n", " \"threshold\": 3.66, # Precomputed threshold\n", " \"num_classes\": 290\n", " }\n", "}\n", "\n", "# Select dataset based on user input\n", "selected_dataset = datasets[args.dataset]\n", "\n", "# Initialize SeaThru Augmentation\n", "seathru_transform = SeaThruAugmentation(\n", " selected_dataset[\"image_folder\"],\n", " selected_dataset[\"depth_image_folder\"],\n", " selected_dataset[\"depth_npy_folder\"],\n", " selected_dataset[\"seathru_parameters_path\"],\n", " selected_dataset[\"depth_variance_path\"],\n", " threshold=selected_dataset[\"threshold\"]\n", ")\n", "\n", "# Initialize Data Module\n", "coco = COCODataModule(\n", " data_dir=selected_dataset[\"image_folder\"],\n", " img_size=384,\n", " batch_size=128,\n", " num_workers=8, # Adjust based on CPU cores\n", " use_cutmix=True,\n", " cutmix_alpha=1.0,\n", " train_classes=None,\n", " sampling_strategy=\"oversample\", # oversample, undersample, default\n", " augmentation_strategy=\"seathru\",\n", " num_classes=selected_dataset[\"num_classes\"],\n", " seathru_transform=seathru_transform\n", ")\n", "\n", "# Model Parameters (Updated n_classes Dynamically)\n", "param_dict = {\n", " \"backbone_desc\": \"resnest101e\",\n", " \"conv_out_dim\": 2048,\n", " \"hidden_dim\": 256,\n", " \"num_encoders\": 2,\n", " \"num_decoders\": 3,\n", " \"num_heads\": 8,\n", " \"batch_size\": 128,\n", " \"image_dim\": 384,\n", " \"learning_rate\": 1e-4,\n", " \"momentum\": 0.9,\n", " \"weight_decay\": 1e-2,\n", " \"n_classes\": selected_dataset[\"num_classes\"], # Dynamically assign class numbers\n", " \"thresh\": 0.4,\n", " \"use_cutmix\": True,\n", " \"use_pos_encoding\": True,\n", " \"loss\": \"ASL\",\n", " \"data\": coco\n", "}\n", "\n", "# Initialize Model\n", "pl_model = Query2LabelTrainModule(**param_dict)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# WandB Logger\n", "wandb_logger = WandbLogger(\n", " project=\"depth_jitter-last-final\",\n", " save_dir=\"training/logs/depthJitter\",\n", " log_model=True,\n", " id=f\"resnest-DJ+all-ASL-UTDAC-384\", # Unique experiment ID\n", " sync_tensorboard=True\n", ")\n", "\n", "# Model Checkpoint Callback\n", "checkpoint_callback = ModelCheckpoint(\n", " monitor=\"val_mAP\",\n", " dirpath=f\"training/checkpoints/depth_jitter_{args.dataset}\",\n", " filename=\"best-checkpoint-{epoch:02d}-{val_mAP:.2f}\",\n", " save_top_k=1,\n", " mode=\"min\"\n", ")\n", "\n", "# Early Stopping Callback\n", "early_stopping_callback = EarlyStopping(\n", " monitor=\"val_mAP\",\n", " patience=30, # Number of epochs with no improvement\n", " verbose=True,\n", " mode=\"min\"\n", ")\n", "\n", "# Trainer Configuration\n", "trainer = pl.Trainer(\n", " max_epochs=200,\n", " precision=16,\n", " accelerator=\"gpu\",\n", " devices=\"auto\",\n", " strategy=\"ddp\",\n", " gradient_clip_val=0.1,\n", " logger=wandb_logger,\n", " default_root_dir=f\"training/checkpoints/depth_jitter_{args.dataset}\",\n", " callbacks=[\n", " TQDMProgressBar(refresh_rate=100),\n", " checkpoint_callback,\n", " early_stopping_callback\n", " ],\n", " accumulate_grad_batches=4,\n", " detect_anomaly=True,\n", " profiler=\"simple\"\n", ")\n", "\n", "# Start Training\n", "trainer.fit(pl_model, param_dict[\"data\"])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import json\n", "import os\n", "import numpy as np\n", "import pandas as pd\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "\n", "# Load Sea-thru parameters\n", "params_path = \"/home/mundus/mrahman528/Depth-Jitter/parameters_train.json\"\n", "depth_dir = \"/home/mundus/mrahman528/projects/mir/depth_jitter/fathomnet_2023_dataset/depth_vis_train\"\n", "\n", "with open(params_path, \"r\") as f:\n", " seathru_params = json.load(f)\n", "\n", "depth_variances = []\n", "\n", "# Iterate through image parameters\n", "for image_name, params in seathru_params.items():\n", " depth_map_path = os.path.join(depth_dir, f\"{image_name.split('.')[0]}_raw_depth_meter.npy\")\n", "\n", " # Ensure depth map exists before loading\n", " if not os.path.exists(depth_map_path):\n", " print(f\"Warning: Missing depth file {depth_map_path}\")\n", " continue\n", "\n", " # Load depth map and compute variance\n", " depth_data = np.load(depth_map_path)\n", " variance = np.var(depth_data)\n", "\n", " # Check if betac converged to lower bound for any channel\n", " converged_to_bound = any(params[f\"channel_{ch}\"][\"betac\"] <= 1e-6 for ch in range(3))\n", "\n", " # Append results\n", " depth_variances.append({\n", " \"image\": image_name,\n", " \"variance\": variance,\n", " \"converged_to_bound\": converged_to_bound\n", " })\n", "\n", "# Convert to DataFrame\n", "df = pd.DataFrame(depth_variances)\n", "\n", "# Split into two cases\n", "case_a = df[df[\"converged_to_bound\"]]\n", "case_b = df[~df[\"converged_to_bound\"]]\n", "\n", "# Visualization\n", "plt.figure(figsize=(8, 6))\n", "sns.histplot(case_a[\"variance\"], kde=True, bins=50, color=\"red\", label=\"Bound Convergence\", alpha=0.6, line_kws={'linewidth': 2})\n", "sns.histplot(case_b[\"variance\"], kde=True, bins=50, color=\"blue\", label=\"No Bound Convergence\", alpha=0.6, line_kws={'linewidth': 2})\n", "plt.xlabel(\"Depth Variance\", fontsize=12)\n", "plt.ylabel(\"Frequency\", fontsize=12)\n", "plt.title(\"Depth Variance Distribution by Convergence-Fathomnet2023\", fontsize=14)\n", "plt.legend()\n", "plt.grid(True, linestyle=\"--\", alpha=0.5)\n", "plt.show()\n", "\n", "# Threshold Experimentation\n", "threshold = df[\"variance\"].quantile(0.25) # Example: Lower quartile\n", "excluded_images = df[df[\"variance\"] < threshold]\n", "percentage_excluded = len(excluded_images) / len(df) * 100\n", "\n", "print(f\"Threshold for Depth Variance: {threshold:.2f}\")\n", "print(f\"Percentage of images below threshold: {percentage_excluded:.2f}%\")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "import json\n", "import numpy as np\n", "from pathlib import Path\n", "\n", "# Set your depth folder paths\n", "depth_npy_folder = \"/home/mundus/mrahman528/projects/mir/depth_jitter/fathomnet_2023_dataset/depth_vis_train\"\n", "\n", "# Dictionary to store depth variances\n", "depth_variances = {}\n", "\n", "# Iterate through depth `.npy` files\n", "for npy_file in Path(depth_npy_folder).glob(\"*.npy\"):\n", " image_name = npy_file.stem.replace(\"_raw_depth_meter\", \"\") + \".jpg\" # Match original image names\n", "\n", " # Load depth map\n", " depth_data = np.load(npy_file)\n", "\n", " # Compute variance\n", " variance = np.var(depth_data)\n", "\n", " # Convert numpy float32 to Python float\n", " depth_variances[image_name] = float(variance)\n", "\n", "# Save the results to a JSON file\n", "output_json_path = \"/home/mundus/mrahman528/Depth-Jitter/depth_variance_fathomnet.json\"\n", "with open(output_json_path, \"w\") as f:\n", " json.dump(depth_variances, f, indent=4)\n", "\n", "print(f\"Saved depth variances for {len(depth_variances)} images to {output_json_path}\")\n" ] } ], "metadata": { "kernelspec": { "display_name": "depth-jitter", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.11" } }, "nbformat": 4, "nbformat_minor": 4 }