Skip to content

[aws-ofi-rccl] [package list]

aws-ofi-rccl/17d41cb-cpeGNU-24.03 (aws-ofi-rccl-17d41cb-cpeGNU-24.03.eb)

Install with the EasyBuild-user module:

eb aws-ofi-rccl-17d41cb-cpeGNU-24.03.eb -r
To access module help after installation and get reminded for which stacks and partitions the module is installed, use module spider aws-ofi-rccl/17d41cb-cpeGNU-24.03.

EasyConfig:

easyblock = 'ConfigureMake'

name = 'aws-ofi-rccl'
# The version according to https://github.com/ROCmSoftwarePlatform/aws-ofi-rccl/blob/cxi/RELEASENOTES.md
version = '17d41cb'

homepage = 'https://github.com/ROCmSoftwarePlatform/aws-ofi-rccl'

whatis = [
    'Description: AWS OFI RCCL is a plug-in which enables EC2 developers to use libfabric as a network provider.'
]

description = """
Machine learning frameworks running on top of AMD GPUs use a library called 
RCCL which provides standard collective communication routines for an arbitrary 
number of GPUs installed across single or multiple nodes.

This module implements a plug-in which maps RCCLs connection-oriented transport 
APIs to libfabric's connection-less reliable interface. This allows RCCL 
applications to take benefit of libfabric's transport layer services like 
reliable message support and operating system bypass.
"""

software_license_urls = ['https://github.com/ROCmSoftwarePlatform/aws-ofi-rccl/blob/cxi/LICENSE']

# The plugin build needs access to MPI directory
toolchain = {'name': 'cpeGNU', 'version': '24.03'}

sources = [{
    'filename': '%(name)s-%(version)s.tar.gz',
    'git_config': {
        'url': 'https://github.com/ROCmSoftwarePlatform',
        'repo_name': '%(name)s',
        'commit': '17d41cbf5618536c4b1076d29748416ab307040f',
    }
}]

builddependencies = [
    ('buildtools', '%(toolchain_version)s', '', True),
]

dependencies = [
    ('rocm/6.0.3', 'EXTERNAL_MODULE'),
]

preconfigopts = ' ./autogen.sh && '
configopts = (
    ' --with-libfabric=/opt/cray/libfabric/$(pkg-config --modversion libfabric) '
    ' --with-hip=$ROCM_PATH/hip '
    ' --with-rccl=$ROCM_PATH/rccl '
)

sanity_check_paths = {
    'files': ['lib/librccl-net.so.0.0.0'],
    'dirs':  ['bin', 'lib']
}

moduleclass = 'devel'

[aws-ofi-rccl] [package list]