Sage Bionetworks Global Configuration

To use this custom configuration, run the pipeline with -profile sage. This will download and load the sage.config, which contains a number of optimizations relevant to Sage employees running workflows on AWS (e.g. using Nextflow Tower). This profile will also load any applicable pipeline-specific configuration.

This global configuration includes the following tweaks:

  • Update the default value for igenomes_base to s3://sage-igenomes
  • Enable retries for all failures
  • Allow pending jobs to finish if the number of retries are exhausted
  • Increase resource allocations for specific resource-related exit codes
  • Optimize resource allocations to better “fit” EC2 instance types
  • Slow the increase in the number of allocated CPU cores on retries
  • Increase the default time limits because we run pipelines on AWS
  • Increase the amount of time allowed for file transfers
  • Improve reliability of file transfers with retries and reduced concurrency
  • Define the check_max() function, which is missing in Sarek v2

Additional information about iGenomes

The following iGenomes prefixes have been copied from s3://ngi-igenomes/ (eu-west-1) to s3://sage-igenomes (us-east-1). See this script for more information. The sage-igenomes S3 bucket has been configured to openly available, but files cannot be downloaded out of us-east-1 to avoid egress charges. You can check the conf/igenomes.config file in each nf-core pipeline to figure out the mapping between genome IDs (i.e. for --genome) and iGenomes prefixes (example).

  • Human Genome Builds
    • Homo_sapiens/Ensembl/GRCh37
    • Homo_sapiens/GATK/GRCh37
    • Homo_sapiens/UCSC/hg19
    • Homo_sapiens/GATK/GRCh38
    • Homo_sapiens/NCBI/GRCh38
    • Homo_sapiens/UCSC/hg38
  • Mouse Genome Builds
    • Mus_musculus/Ensembl/GRCm38
    • Mus_musculus/UCSC/mm10

Config file

See config file on GitHub

sage.config
// Config profile metadata
params {
    config_profile_description = 'The Sage Bionetworks Nextflow Config Profile'
    config_profile_contact = 'Bruno Grande (@BrunoGrandePhD)'
    config_profile_url = 'https://github.com/Sage-Bionetworks-Workflows'
}
 
// Leverage us-east-1 mirror of select human and mouse genomes
params {
    igenomes_base   = 's3://sage-igenomes/igenomes'
    cpus            = 4
    max_cpus        = 32
    max_memory      = 128.GB
    max_time        = 240.h
    single_cpu_mem  = 6.GB
}
 
// Enable retries globally for certain exit codes
process {
    maxErrors     = '-1'
    maxRetries    = 5
    errorStrategy = { task.attempt <= 5 ? 'retry' : 'finish' }
}
 
// Increase time limit to allow file transfers to finish
// The default is 12 hours, which results in timeouts
threadPool.FileTransfer.maxAwait = '24 hour'
 
// Configure Nextflow to be more reliable on AWS
aws {
    region = "us-east-1"
    client {
    uploadMaxThreads = 4
    }
    batch {
    retryMode = 'built-in'
    maxParallelTransfers = 1
    maxTransferAttempts = 10
    delayBetweenAttempts = '60 sec'
    }
}
 
// Adjust default resource allocations (see `../docs/sage.md`)
process {
 
    cpus     = { check_max( 1     * factor(task, 2),  'cpus'   ) }
    memory   = { check_max( 6.GB  * factor(task, 1),  'memory' ) }
    time     = { check_max( 24.h  * factor(task, 1),  'time'   ) }
 
    // Process-specific resource requirements
    withLabel: 'process_single' {
    cpus   = { check_max( 1     * factor(task, 2),  'cpus'   ) }
    memory = { check_max( 6.GB  * factor(task, 1),  'memory' ) }
    time   = { check_max( 24.h  * factor(task, 1),  'time'   ) }
    }
    withLabel: 'process_low' {
    cpus   = { check_max( 2     * factor(task, 2),  'cpus'   ) }
    memory = { check_max( 12.GB * factor(task, 1),  'memory' ) }
    time   = { check_max( 24.h  * factor(task, 1),  'time'   ) }
    }
    withLabel: 'process_medium' {
    cpus   = { check_max( 8     * factor(task, 2),  'cpus'   ) }
    memory = { check_max( 32.GB * factor(task, 1),  'memory' ) }
    time   = { check_max( 48.h  * factor(task, 1),  'time'   ) }
    }
    withLabel: 'process_high' {
    cpus   = { check_max( 16    * factor(task, 2),  'cpus'   ) }
    memory = { check_max( 64.GB * factor(task, 1),  'memory' ) }
    time   = { check_max( 96.h  * factor(task, 1),  'time'   ) }
    }
    withLabel: 'process_long' {
    time   = { check_max( 96.h  * factor(task, 1),  'time'   ) }
    }
    withLabel: 'process_high_memory|memory_max' {
    memory = { check_max( 128.GB * factor(task, 1), 'memory' ) }
    }
    withLabel: 'cpus_max' {
    cpus   = { check_max( 32     * factor(task, 2), 'cpus'   ) }
    }
 
}
 
 
// Function to finely control the increase of the resource allocation
def factor(task, slow_factor = 1) {
    if ( task.exitStatus in [143,137,104,134,139,247] ) {
        return Math.ceil( task.attempt / slow_factor) as int
    } else {
        return 1 as int
    }
}
 
 
// Function to ensure that resource requirements don't go
// beyond a maximum limit (copied here for Sarek v2)
def check_max(obj, type) {
    if (type == 'memory') {
        try {
            if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1)
                return params.max_memory as nextflow.util.MemoryUnit
            else
                return obj
        } catch (all) {
            println "   ### ERROR ###   Max memory '${params.max_memory}' is not valid! Using default value: $obj"
            return obj
        }
    } else if (type == 'time') {
        try {
            if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1)
                return params.max_time as nextflow.util.Duration
            else
                return obj
        } catch (all) {
            println "   ### ERROR ###   Max time '${params.max_time}' is not valid! Using default value: $obj"
            return obj
        }
    } else if (type == 'cpus') {
        try {
            return Math.min( obj, params.max_cpus as int )
        } catch (all) {
            println "   ### ERROR ###   Max cpus '${params.max_cpus}' is not valid! Using default value: $obj"
            return obj
        }
    }
}