Files
asmo_vhead/multi_gpu_temporal_train.sh

26 lines
671 B
Bash
Raw Normal View History

#!/usr/bin/env bash
# Simple multi-GPU training script for SwiftFormerTemporal
# Usage: ./multi_gpu_temporal_train.sh <NUM_GPUS> [OPTIONS]
NUM_GPUS=${1:-2}
shift
echo "Starting multi-GPU training with $NUM_GPUS GPUs"
# Set environment variables for distributed training
export MASTER_PORT=12345
export MASTER_ADDR=localhost
export WORLD_SIZE=$NUM_GPUS
# Launch training
torchrun --nproc_per_node=$NUM_GPUS --master_port=$MASTER_PORT main_temporal.py \
--data-path "./videos" \
--model SwiftFormerTemporal_XS \
--batch-size 32 \
--epochs 100 \
--lr 1e-3 \
--output-dir "./temporal_output_multi" \
--num-workers 8 \
--pin-mem \
"$@"