Sample Files¶
This is a quick sample of the generated files.
Singularity¶
We have:
jobs/{name}/{job_id}.sbatch: The sbatch file for each job.
jobs/{name}/run.sh: A script that submits all jobs at once.
import slurmjobs
jobs = slurmjobs.Singularity(
'python train.py', name='sing', backup=False,
email='me@nyu.edu', n_gpus=2)
run_script, job_paths = jobs.generate([
('model', ['AVE', 'AVOL']),
('audio_channels', [1, 2]),
], epochs=500)
slurmjobs.util.summary(run_script, job_paths)
Generated 4 job scripts:
jobs/sing/sing,model-AVE,audio_channels-1.sbatch
jobs/sing/sing,model-AVE,audio_channels-2.sbatch
jobs/sing/sing,model-AVOL,audio_channels-1.sbatch
jobs/sing/sing,model-AVOL,audio_channels-2.sbatch
To submit all jobs, run:
. jobs/sing/run.sh
jobs/sing/sing,model-AVE,audio_channels-1.sbatch#!/bin/bash
#SBATCH --job-name=sing,model-AVE,audio_channels-1
#SBATCH --mail-type=ALL
#SBATCH --mail-user=me@nyu.edu
#SBATCH --output=jobs/sing/slurm/slurm_%j__sing,model-AVE,audio_channels-1.log
#SBATCH --time=3-0
#SBATCH --mem=48GB
#SBATCH --gres=gpu:2
#########################
#
# Job: sing,model-AVE,audio_channels-1
# Args:
# {'audio_channels': 1,
# 'epochs': 500,
# 'job_id': 'sing,model-AVE,audio_channels-1',
# 'model': 'AVE'}
#
#########################
###### ( hop into the singularity o_O )
singularity exec \
--nv \
--overlay overlay-5GB-200K.ext3:ro \
/scratch/work/public/singularity/cuda11.0-cudnn8-devel-ubuntu18.04.sif \
/bin/bash << EOF
echo "@: entered singularity container"
source /ext3/env.sh
python train.py \
--model=AVE \
--audio_channels=1 \
--epochs=500 \
--job_id=sing,model-AVE,audio_channels-1
###### (escape from the singularity @o@ )
echo "@: exiting singularity container"
exit 0;
EOF
jobs/sing/sing,model-AVE,audio_channels-2.sbatch#!/bin/bash
#SBATCH --job-name=sing,model-AVE,audio_channels-2
#SBATCH --mail-type=ALL
#SBATCH --mail-user=me@nyu.edu
#SBATCH --output=jobs/sing/slurm/slurm_%j__sing,model-AVE,audio_channels-2.log
#SBATCH --time=3-0
#SBATCH --mem=48GB
#SBATCH --gres=gpu:2
#########################
#
# Job: sing,model-AVE,audio_channels-2
# Args:
# {'audio_channels': 2,
# 'epochs': 500,
# 'job_id': 'sing,model-AVE,audio_channels-2',
# 'model': 'AVE'}
#
#########################
###### ( hop into the singularity o_O )
singularity exec \
--nv \
--overlay overlay-5GB-200K.ext3:ro \
/scratch/work/public/singularity/cuda11.0-cudnn8-devel-ubuntu18.04.sif \
/bin/bash << EOF
echo "@: entered singularity container"
source /ext3/env.sh
python train.py \
--model=AVE \
--audio_channels=2 \
--epochs=500 \
--job_id=sing,model-AVE,audio_channels-2
###### (escape from the singularity @o@ )
echo "@: exiting singularity container"
exit 0;
EOF
jobs/sing/sing,model-AVOL,audio_channels-1.sbatch#!/bin/bash
#SBATCH --job-name=sing,model-AVOL,audio_channels-1
#SBATCH --mail-type=ALL
#SBATCH --mail-user=me@nyu.edu
#SBATCH --output=jobs/sing/slurm/slurm_%j__sing,model-AVOL,audio_channels-1.log
#SBATCH --time=3-0
#SBATCH --mem=48GB
#SBATCH --gres=gpu:2
#########################
#
# Job: sing,model-AVOL,audio_channels-1
# Args:
# {'audio_channels': 1,
# 'epochs': 500,
# 'job_id': 'sing,model-AVOL,audio_channels-1',
# 'model': 'AVOL'}
#
#########################
###### ( hop into the singularity o_O )
singularity exec \
--nv \
--overlay overlay-5GB-200K.ext3:ro \
/scratch/work/public/singularity/cuda11.0-cudnn8-devel-ubuntu18.04.sif \
/bin/bash << EOF
echo "@: entered singularity container"
source /ext3/env.sh
python train.py \
--model=AVOL \
--audio_channels=1 \
--epochs=500 \
--job_id=sing,model-AVOL,audio_channels-1
###### (escape from the singularity @o@ )
echo "@: exiting singularity container"
exit 0;
EOF
jobs/sing/sing,model-AVOL,audio_channels-2.sbatch#!/bin/bash
#SBATCH --job-name=sing,model-AVOL,audio_channels-2
#SBATCH --mail-type=ALL
#SBATCH --mail-user=me@nyu.edu
#SBATCH --output=jobs/sing/slurm/slurm_%j__sing,model-AVOL,audio_channels-2.log
#SBATCH --time=3-0
#SBATCH --mem=48GB
#SBATCH --gres=gpu:2
#########################
#
# Job: sing,model-AVOL,audio_channels-2
# Args:
# {'audio_channels': 2,
# 'epochs': 500,
# 'job_id': 'sing,model-AVOL,audio_channels-2',
# 'model': 'AVOL'}
#
#########################
###### ( hop into the singularity o_O )
singularity exec \
--nv \
--overlay overlay-5GB-200K.ext3:ro \
/scratch/work/public/singularity/cuda11.0-cudnn8-devel-ubuntu18.04.sif \
/bin/bash << EOF
echo "@: entered singularity container"
source /ext3/env.sh
python train.py \
--model=AVOL \
--audio_channels=2 \
--epochs=500 \
--job_id=sing,model-AVOL,audio_channels-2
###### (escape from the singularity @o@ )
echo "@: exiting singularity container"
exit 0;
EOF
jobs/sing/run.sh
#########################
#
# Job Batch: sing
# Params:
# [
# ('model', ['AVE', 'AVOL']),
# ('audio_channels', [1, 2]),
# ]
#
#########################
sbatch "jobs/sing/sing,model-AVE,audio_channels-1.sbatch"
sbatch "jobs/sing/sing,model-AVE,audio_channels-2.sbatch"
sbatch "jobs/sing/sing,model-AVOL,audio_channels-1.sbatch"
sbatch "jobs/sing/sing,model-AVOL,audio_channels-2.sbatch"
Slurm¶
We have:
jobs/{name}/{job_id}.sbatch: The sbatch file for each job.
jobs/{name}/run.sh: A script that submits all jobs at once.
import slurmjobs
jobs = slurmjobs.Slurm(
'python train.py', name='slurm', backup=False,
email='me@nyu.edu', n_gpus=2)
run_script, job_paths = jobs.generate([
('model', ['AVE', 'AVOL']),
('audio_channels', [1, 2]),
], epochs=500)
slurmjobs.util.summary(run_script, job_paths)
Generated 4 job scripts:
jobs/slurm/slurm,model-AVE,audio_channels-1.sbatch
jobs/slurm/slurm,model-AVE,audio_channels-2.sbatch
jobs/slurm/slurm,model-AVOL,audio_channels-1.sbatch
jobs/slurm/slurm,model-AVOL,audio_channels-2.sbatch
To submit all jobs, run:
. jobs/slurm/run.sh
jobs/slurm/slurm,model-AVE,audio_channels-1.sbatch#!/bin/bash
#SBATCH --job-name=slurm,model-AVE,audio_channels-1
#SBATCH --mail-type=ALL
#SBATCH --mail-user=me@nyu.edu
#SBATCH --output=jobs/slurm/slurm/slurm_%j__slurm,model-AVE,audio_channels-1.log
#SBATCH --time=3-0
#SBATCH --mem=48GB
#SBATCH --gres=gpu:2
#########################
#
# Job: slurm,model-AVE,audio_channels-1
# Args:
# {'audio_channels': 1,
# 'epochs': 500,
# 'job_id': 'slurm,model-AVE,audio_channels-1',
# 'model': 'AVE'}
#
#########################
##### Load Modules
module purge
python train.py \
--model=AVE \
--audio_channels=1 \
--epochs=500 \
--job_id=slurm,model-AVE,audio_channels-1
jobs/slurm/slurm,model-AVE,audio_channels-2.sbatch#!/bin/bash
#SBATCH --job-name=slurm,model-AVE,audio_channels-2
#SBATCH --mail-type=ALL
#SBATCH --mail-user=me@nyu.edu
#SBATCH --output=jobs/slurm/slurm/slurm_%j__slurm,model-AVE,audio_channels-2.log
#SBATCH --time=3-0
#SBATCH --mem=48GB
#SBATCH --gres=gpu:2
#########################
#
# Job: slurm,model-AVE,audio_channels-2
# Args:
# {'audio_channels': 2,
# 'epochs': 500,
# 'job_id': 'slurm,model-AVE,audio_channels-2',
# 'model': 'AVE'}
#
#########################
##### Load Modules
module purge
python train.py \
--model=AVE \
--audio_channels=2 \
--epochs=500 \
--job_id=slurm,model-AVE,audio_channels-2
jobs/slurm/slurm,model-AVOL,audio_channels-1.sbatch#!/bin/bash
#SBATCH --job-name=slurm,model-AVOL,audio_channels-1
#SBATCH --mail-type=ALL
#SBATCH --mail-user=me@nyu.edu
#SBATCH --output=jobs/slurm/slurm/slurm_%j__slurm,model-AVOL,audio_channels-1.log
#SBATCH --time=3-0
#SBATCH --mem=48GB
#SBATCH --gres=gpu:2
#########################
#
# Job: slurm,model-AVOL,audio_channels-1
# Args:
# {'audio_channels': 1,
# 'epochs': 500,
# 'job_id': 'slurm,model-AVOL,audio_channels-1',
# 'model': 'AVOL'}
#
#########################
##### Load Modules
module purge
python train.py \
--model=AVOL \
--audio_channels=1 \
--epochs=500 \
--job_id=slurm,model-AVOL,audio_channels-1
jobs/slurm/slurm,model-AVOL,audio_channels-2.sbatch#!/bin/bash
#SBATCH --job-name=slurm,model-AVOL,audio_channels-2
#SBATCH --mail-type=ALL
#SBATCH --mail-user=me@nyu.edu
#SBATCH --output=jobs/slurm/slurm/slurm_%j__slurm,model-AVOL,audio_channels-2.log
#SBATCH --time=3-0
#SBATCH --mem=48GB
#SBATCH --gres=gpu:2
#########################
#
# Job: slurm,model-AVOL,audio_channels-2
# Args:
# {'audio_channels': 2,
# 'epochs': 500,
# 'job_id': 'slurm,model-AVOL,audio_channels-2',
# 'model': 'AVOL'}
#
#########################
##### Load Modules
module purge
python train.py \
--model=AVOL \
--audio_channels=2 \
--epochs=500 \
--job_id=slurm,model-AVOL,audio_channels-2
jobs/slurm/run.sh
#########################
#
# Job Batch: slurm
# Params:
# [
# ('model', ['AVE', 'AVOL']),
# ('audio_channels', [1, 2]),
# ]
#
#########################
sbatch "jobs/slurm/slurm,model-AVE,audio_channels-1.sbatch"
sbatch "jobs/slurm/slurm,model-AVE,audio_channels-2.sbatch"
sbatch "jobs/slurm/slurm,model-AVOL,audio_channels-1.sbatch"
sbatch "jobs/slurm/slurm,model-AVOL,audio_channels-2.sbatch"
Shell¶
We have:
jobs/{name}/{job_id}.job.sh: The shell file for each job.
jobs/{name}/run.sh: A script that submits all jobs at once.
import slurmjobs
jobs = slurmjobs.Shell(
'python train.py', name='shell', backup=False,
email='me@nyu.edu', n_gpus=2)
run_script, job_paths = jobs.generate([
('model', ['AVE', 'AVOL']),
('audio_channels', [1, 2]),
], epochs=500)
slurmjobs.util.summary(run_script, job_paths)
Error
Traceback (most recent call last):
File "/home/docs/checkouts/readthedocs.org/user_builds/slurmjobs/checkouts/latest/docs/_tmp.py", line 5, in <module>
email='me@nyu.edu', n_gpus=2)
File "/home/docs/checkouts/readthedocs.org/user_builds/slurmjobs/envs/latest/lib/python3.7/site-packages/slurmjobs-1.0.0-py3.7.egg/slurmjobs/core.py", line 121, in __init__
f"Unrecognized options: {wrong_options}. If you extended your template to use additional options, "
TypeError: Unrecognized options: {'n_gpus'}. If you extended your template to use additional options, please give them default values in your Class.options dictionary.