KU Leuven/UHasselt Tier-2 High Performance Computing Infrastructure (VSC)

NB: You will need an account to use the HPC cluster to run the pipeline.

  1. Install Nextflow on the cluster
conda create --name nf-core python=3.12 nf-core nextflow
Note

A nextflow module is available that can be loaded module load Nextflow but it does not support plugins. So it’s not recommended

  1. Set up the environment variables in ~/.bashrc or ~/.bash_profile:
export SLURM_ACCOUNT="<your-credential-account>"
 
# Needed for running Nextflow jobs
export NXF_HOME="$VSC_SCRATCH/.nextflow"
export NXF_WORK="$VSC_SCRATCH/work"
 
# Needed for running Apptainer containers
export APPTAINER_CACHEDIR="$VSC_SCRATCH/.apptainer/cache"
export APPTAINER_TMPDIR="$VSC_SCRATCH/.apptainer/tmp"
export NXF_CONDA_CACHEDIR="$VSC_SCRATCH/miniconda3/envs"
 
# Optional tower key
# export TOWER_ACCESS_TOKEN="<your_tower_access_token>"
# export NXF_VER="<version>"      # make sure it's larger then 24.10.1
Warning

The current config is setup with array jobs. Make sure nextflow version >= 24.10.1, read array jobs in nextflow you can do this in

export NXF_VER=24.10.1
  1. Make the submission script.

NB: you should go to the cluster you want to run the pipeline on. You can check what clusters have the most free space using following command sinfo --cluster wice|genius.

$ more job.pbs
#!/bin/bash -l
#SBATCH --account=...
#SBATCH --chdir=....
#SBATCH --partition=batch_long
#SBATCH --nodes="1"
#SBATCH --ntasks-per-node="1"
 
# module load Nextflow # does not support plugins
conda activate nf-core
 
nextflow run <pipeline> -profile vsc_kul_uhasselt,<CLUSTER> <Add your other parameters>

NB: You have to specify your credential account, by setting export SLURM_ACCOUNT="<your-credential-account>" else the jobs will fail!

Here the cluster options are:

  • genius
  • genius_gpu
  • wice
  • wice_gpu
  • superdome

NB: The vsc_kul_uhasselt profile is based on a selected amount of SLURM partitions. The profile will select to its best ability the most appropriate partition for the job. Including modules with a label containing gpuwill be allocated to a gpu partition when the ‘normal’ genius profile is selected. Select the genius_gpu or wice_gpu profile to force the job to be allocated to a gpu partition. NB: If the module does not have accelerator set, it will determine the number of GPUs based on the requested resources.

Use the --cluster option to specify the cluster you intend to use when submitting the job:

sbatch --cluster=wice|genius job.slurm 

All of the intermediate files required to run the pipeline will be stored in the work/ directory. It is recommended to delete this directory after the pipeline has finished successfully because it can get quite large, and all of the main output files will be saved in the results/ directory anyway.

Config file

See config file on GitHub

vsc_kul_uhasselt.config
// Default to /tmp directory if $VSC_SCRATCH scratch env is not available,
// see: https://github.com/nf-core/configs?tab=readme-ov-file#adding-a-new-config
scratch_dir   = System.getenv("VSC_SCRATCH") ?: "/tmp"
tier1_project = System.getenv("SLURM_ACCOUNT") ?: null
 
// Perform work directory cleanup when the run has succesfully completed
// cleanup = true
 
// Reduce the job submit rate to about 50 per minute, this way the server won't be bombarded with jobs
// Limit queueSize to keep job rate under control and avoid timeouts
executor {
    submitRateLimit = '50/1min'
    queueSize = 50
    exitReadTimeout = "10min"
}
 
// Add backoff strategy to catch cluster timeouts and proper symlinks of files in scratch to the work directory
process {
    executor      = 'slurm'
    stageInMode   = "symlink"
    stageOutMode  = "rsync"
    errorStrategy = { sleep(Math.pow(2, task.attempt ?: 1) * 200 as long); return 'retry' }
    maxRetries    = 3
    array         = 30
}
 
// Specify that singularity should be used and where the cache dir will be for the images
singularity {
    enabled     = true
    autoMounts  = true
    cacheDir    = "$scratch_dir/.singularity"
    pullTimeout = "30 min"
}
 
params {
    config_profile_contact     = 'GitHub: @Joon-Klaps - Email: joon.klaps@kuleuven.be'
    config_profile_url         = 'https://docs.vscentrum.be/en/latest/index.html'
}
 
env {
    APPTAINER_TMPDIR="$scratch_dir/.apptainer/tmp"
    APPTAINER_CACHEDIR="$scratch_dir/.apptainer/cache"
}
 
// AWS maximum retries for errors (This way the pipeline doesn't fail if the download fails one time)
aws {
    maxErrorRetry = 3
}
 
// Define profiles for each cluster
profiles {
    genius {
        params.config_profile_description = 'genius profile for use on the genius cluster of the VSC HPC.'
 
        process {
            // 768 - 65 so 65GB for overhead, max is 720000MB
            resourceLimits = [ memory: 703.GB, cpus: 36, time: 168.h ]
            beforeScript = 'module load cluster/genius'
            clusterOptions = { "--clusters=genius --account=$tier1_project" }
 
            queue = {
                task.memory >= 175.GB ?
                    (task.time >= 72.h ? 'dedicated_big_bigmem,dedicated_big_batch,bigmem_long' : 'bigmem') :
                    (task.time >= 72.h ? 'batch_long' : 'batch')
            }
 
            withLabel: '.*gpu.*'{
                resourceLimits         = [ memory: 703.GB, cpus: 36 , time: 168.h ]
                apptainer.runOptions   = '--containall --cleanenv --nv'
                singularity.runOptions = '--containall --cleanenv --nv'
                clusterOptions         = {
                    // suggested to use 9 cpus per gpu
                    def gpus = task.accelerator?.request ?: Math.max(1, Math.floor((task.cpus ?:1)/9) as int)
                    "--gres=gpu:${gpus} --clusters=genius --account=$tier1_project"
                }
 
                queue = {
                    task.memory >= 175.GB ?
                        (task.time >= 72.h ? 'gpu_v100_long' : 'gpu_v100') :
                        (task.time >= 72.h ? 'gpu_p100_long,amd_long' : 'gpu_p100,amd')
                }
            }
        }
    }
 
 
    genius_gpu {
        params.config_profile_description = 'genius_gpu profile for use on the genius cluster of the VSC HPC.'
        apptainer.runOptions              = '--containall --cleanenv --nv'
        singularity.runOptions            = '--containall --cleanenv --nv'
 
        process {
            // 768 - 65 so 65GB for overhead, max is 720000MB
            resourceLimits = [ memory: 703.GB, cpus: 36, time: 168.h]
            beforeScript   = 'module load cluster/genius'
            clusterOptions = {
                def gpus = task.accelerator?.request ?: Math.max(1, Math.floor((task.cpus ?:1)/9) as int)
                "--gres=gpu:${gpus} --clusters=genius --account=$tier1_project"
            }
 
            queue = {
                    task.memory >= 175.GB ?
                        (task.time >= 72.h ? 'gpu_v100_long' : 'gpu_v100') :
                        (task.time >= 72.h ? 'gpu_p100_long,amd_long' : 'gpu_p100,amd')
            }
        }
    }
 
    wice {
        params.config_profile_description = 'wice profile for use on the Wice cluster of the VSC HPC.'
 
        process {
            // max is 2016000
            resourceLimits = [ memory: 1968.GB, cpus: 72, time: 168.h ]
            clusterOptions = { "--clusters=wice --account=$tier1_project"}
            beforeScript   = 'module load cluster/wice'
 
            queue = {
                task.memory >= 239.GB ?
                    (task.time >= 72.h ? 'dedicated_big_bigmem' : 'bigmem,hugemem') :
                    (task.time >= 72.h ? 'batch_long,batch_icelake_long,batch_sapphirerapids_long' : 'batch,batch_sapphirerapids,batch_icelake')
            }
 
            withLabel: '.*gpu.*'{
                resourceLimits         = [ memory: 703.GB, cpus: 64, time: 168.h ]
                apptainer.runOptions   = '--containall --cleanenv --nv'
                singularity.runOptions = '--containall --cleanenv --nv'
                clusterOptions         = {
                    // suggested to use 16 cpus per gpu
                    def gpus = task.accelerator?.request ?: Math.max(1, Math.floor((task.cpus ?:1)/16) as int)
                    "--gres=gpu:${gpus} --clusters=wice --account=$tier1_project"
                }
 
                queue = {
                    task.memory >= 239.GB ?
                        (task.time >= 72.h ? 'dedicated_big_gpu_h100' : 'gpu_h100') :
                        (task.time >= 72.h ? 'dedicated_big_gpu' : 'gpu_a100,gpu')
                }
            }
        }
    }
 
 
    wice_gpu {
        params.config_profile_description = 'wice_gpu profile for use on the Wice cluster of the VSC HPC.'
        apptainer.runOptions              = '--containall --cleanenv --nv'
        singularity.runOptions            = '--containall --cleanenv --nv'
 
        process {
            // 768 - 65 so 65GB for overhead, max is 720000MB
            resourceLimits = [ memory: 703.GB, cpus: 64, time: 168.h ]
            beforeScript   = 'module load cluster/wice'
            clusterOptions = {
                def gpus = task.accelerator?.request ?: Math.max(1, Math.floor((task.cpus ?:1)/16) as int)
                "--gres=gpu:${gpus} --clusters=wice --account=$tier1_project"
            }
 
            queue = {
                task.memory >= 239.GB ?
                    (task.time >= 72.h ? 'dedicated_big_gpu_h100' : 'gpu_h100') :
                    (task.time >= 72.h ? 'dedicated_big_gpu' : 'gpu_a100,gpu')
            }
        }
    }
 
    superdome {
        params.config_profile_description = 'superdome profile for use on the genius cluster of the VSC HPC.'
 
        process {
            clusterOptions = {"--clusters=genius --account=$tier1_project"}
            beforeScript   = 'module load cluster/genius/superdome'
            // 6000 - 228 so 228GB for overhead, max is 5910888MB
            resourceLimits = [ memory: 5772.GB, cpus: 14, time: 168.h]
 
            queue = { task.time <= 72.h ? 'superdome' : 'superdome_long' }
        }
    }
}