irfanh0926's picture
Upload 13 files
64fab7f verified
#!/usr/bin/env python3
"""
Complete pipeline script for T5 Parallel Model
"""
import os
import sys
import argparse
from src.data_preprocess import main as preprocess_data
from src.train import train_model
def main():
parser = argparse.ArgumentParser(description="T5 Parallel Model Training Pipeline")
parser.add_argument("--preprocess-only", action="store_true", help="Only preprocess data")
parser.add_argument("--train-only", action="store_true", help="Only train model")
parser.add_argument("--sample-size", type=int, default=10000, help="Dataset sample size")
args = parser.parse_args()
# Create necessary directories
os.makedirs("data/processed", exist_ok=True)
os.makedirs("checkpoints", exist_ok=True)
os.makedirs("logs", exist_ok=True)
if not args.train_only:
print("=== Starting Data Preprocessing ===")
# You might need to modify preprocess_data to accept sample_size
preprocess_data()
if not args.preprocess_only:
print("=== Starting Model Training ===")
train_model()
if __name__ == "__main__":
main()