PSyclone icon indicating copy to clipboard operation
PSyclone copied to clipboard

Applying omp_cpu_trans to the files excluded from the omp_gpu_trans

Open kaanolgu opened this issue 1 year ago • 1 comments

Would this combined omp_cpu_trans and omp_gpu_trans files work ? I assume it will I haven't tested yet

from utils import (
    insert_explicit_loop_parallelism, normalise_loops, add_profiling,
    enhance_tree_information, OTHER_ISSUES, DONT_PARALLELISE)
from psyclone.psyir.nodes import (
    Loop, Routine, Directive, Assignment, OMPAtomicDirective)
from psyclone.psyir.transformations import OMPTargetTrans
from psyclone.transformations import (
    OMPLoopTrans, OMPDeclareTargetTrans, TransformationError)

PROFILING_ENABLED = False

# List of all files that psyclone will skip processing
FILES_TO_SKIP = OTHER_ISSUES + [
    "asminc.f90",
    "trosk.f90",    # TODO #1254
    "vremap.f90",   # Bulk assignment of a structure component
    "lib_mpp.f90",  # Compiler Error: Illegal substring expression
    "prtctl.f90",   # Compiler Error: Illegal substring expression
    "sbcblk.f90",   # Compiler Error: Vector expression used where scalar
                    # expression required
    "diadct.f90",   # Compiler Error: Wrong number of arguments in reshape
    "stpctl.f90",
    "lbcnfd.f90",
    "flread.f90",
    "sedini.f90",
    "diu_bulk.f90",  # Linking undefined reference
    "bdyini.f90",    # Linking undefined reference
    "trcrad.f90",
]

# List of files that will use CPU transformations instead of GPU
LIST_OF_CPU_TRANS = ["foo.f90", "bar.f90"]  # Example

def trans(psyir):
    ''' Add OpenMP Target and Loop directives to all loops, for GPU offloading,
    or apply CPU OpenMP threading directives depending on the file.

    :param psyir: the PSyIR of the provided file.
    :type psyir: :py:class:`psyclone.psyir.nodes.FileContainer`

    '''
    if psyir.name in LIST_OF_CPU_TRANS:
        # Apply CPU transformations
        omp_parallel_trans = None
        omp_loop_trans = OMPLoopTrans(omp_schedule="static")
        omp_loop_trans.omp_directive = "paralleldo"
        print(f"Applying CPU transformations to file: {psyir.name}")
    else:
        # Apply GPU transformations
        omp_target_trans = OMPTargetTrans()
        omp_loop_trans = OMPLoopTrans(omp_schedule="static")
        omp_loop_trans.omp_directive = "loop"
        print(f"Applying GPU transformations to file: {psyir.name}")

    for subroutine in psyir.walk(Routine):

        if PROFILING_ENABLED:
            add_profiling(subroutine.children)

        enhance_tree_information(subroutine)

        normalise_loops(
            subroutine,
            hoist_local_arrays=(psyir.name not in LIST_OF_CPU_TRANS),
            convert_array_notation=True,
            loopify_array_intrinsics=(psyir.name not in LIST_OF_CPU_TRANS),
            convert_range_loops=True,
            hoist_expressions=(psyir.name not in LIST_OF_CPU_TRANS)
        )

        # Handle GPU and CPU cases
        if psyir.name in LIST_OF_CPU_TRANS:
            # CPU case
            if psyir.name not in DONT_PARALLELISE:
                insert_explicit_loop_parallelism(
                    subroutine,
                    region_directive_trans=omp_parallel_trans,
                    loop_directive_trans=omp_loop_trans,
                    collapse=False,
                    privatise_arrays=psyir.name != "ldftra.f90",
                )
        else:
            # GPU case
            # Skip processing for certain files
            if psyir.name.startswith("obs_"):
                return

            # Special cases and GPU transformations
            if psyir.name == "stpctl.f90":
                for loop in subroutine.walk(Loop):
                    if loop.ancestor(Directive):
                        continue
                    try:
                        omp_loop_trans.apply(loop, options={"force": True})
                    except TransformationError:
                        continue
                    omp_target_trans.apply(loop.parent.parent)
                    assigns = loop.walk(Assignment)
                    if len(assigns) == 1 and assigns[0].lhs.symbol.name == "zmax":
                        stmt = assigns[0]
                        if OMPAtomicDirective.is_valid_atomic_statement(stmt):
                            parent = stmt.parent
                            atomic = OMPAtomicDirective()
                            atomic.children[0].addchild(stmt.detach())
                            parent.addchild(atomic)
                continue

            if psyir.name not in DONT_PARALLELISE:
                insert_explicit_loop_parallelism(
                    subroutine,
                    region_directive_trans=omp_target_trans,
                    loop_directive_trans=omp_loop_trans,
                    collapse=True,
                )
                

kaanolgu avatar Oct 24 '24 21:10 kaanolgu

@addy419 @sergisiso regarding the topic discussed at the mattermost chat

kaanolgu avatar Oct 24 '24 21:10 kaanolgu

I'm going to close this one as it's not really a PSyclone issue per se, more usability. We do now automatically add OMP threading where we fail to offload (see the examples/nemo/scripts).

arporter avatar Jan 21 '25 08:01 arporter

Thanks Andy, since the script is not yet in the master branch, I will put a reference here:

https://github.com/stfc/PSyclone/blob/7179f0e12469bfbd8799628444048e7237f24835/examples/nemo/scripts/omp_gpu_trans.py#L199-L213

sergisiso avatar Jan 21 '25 10:01 sergisiso