PSyclone
PSyclone copied to clipboard
Applying omp_cpu_trans to the files excluded from the omp_gpu_trans
Would this combined omp_cpu_trans and omp_gpu_trans files work ? I assume it will I haven't tested yet
from utils import (
insert_explicit_loop_parallelism, normalise_loops, add_profiling,
enhance_tree_information, OTHER_ISSUES, DONT_PARALLELISE)
from psyclone.psyir.nodes import (
Loop, Routine, Directive, Assignment, OMPAtomicDirective)
from psyclone.psyir.transformations import OMPTargetTrans
from psyclone.transformations import (
OMPLoopTrans, OMPDeclareTargetTrans, TransformationError)
PROFILING_ENABLED = False
# List of all files that psyclone will skip processing
FILES_TO_SKIP = OTHER_ISSUES + [
"asminc.f90",
"trosk.f90", # TODO #1254
"vremap.f90", # Bulk assignment of a structure component
"lib_mpp.f90", # Compiler Error: Illegal substring expression
"prtctl.f90", # Compiler Error: Illegal substring expression
"sbcblk.f90", # Compiler Error: Vector expression used where scalar
# expression required
"diadct.f90", # Compiler Error: Wrong number of arguments in reshape
"stpctl.f90",
"lbcnfd.f90",
"flread.f90",
"sedini.f90",
"diu_bulk.f90", # Linking undefined reference
"bdyini.f90", # Linking undefined reference
"trcrad.f90",
]
# List of files that will use CPU transformations instead of GPU
LIST_OF_CPU_TRANS = ["foo.f90", "bar.f90"] # Example
def trans(psyir):
''' Add OpenMP Target and Loop directives to all loops, for GPU offloading,
or apply CPU OpenMP threading directives depending on the file.
:param psyir: the PSyIR of the provided file.
:type psyir: :py:class:`psyclone.psyir.nodes.FileContainer`
'''
if psyir.name in LIST_OF_CPU_TRANS:
# Apply CPU transformations
omp_parallel_trans = None
omp_loop_trans = OMPLoopTrans(omp_schedule="static")
omp_loop_trans.omp_directive = "paralleldo"
print(f"Applying CPU transformations to file: {psyir.name}")
else:
# Apply GPU transformations
omp_target_trans = OMPTargetTrans()
omp_loop_trans = OMPLoopTrans(omp_schedule="static")
omp_loop_trans.omp_directive = "loop"
print(f"Applying GPU transformations to file: {psyir.name}")
for subroutine in psyir.walk(Routine):
if PROFILING_ENABLED:
add_profiling(subroutine.children)
enhance_tree_information(subroutine)
normalise_loops(
subroutine,
hoist_local_arrays=(psyir.name not in LIST_OF_CPU_TRANS),
convert_array_notation=True,
loopify_array_intrinsics=(psyir.name not in LIST_OF_CPU_TRANS),
convert_range_loops=True,
hoist_expressions=(psyir.name not in LIST_OF_CPU_TRANS)
)
# Handle GPU and CPU cases
if psyir.name in LIST_OF_CPU_TRANS:
# CPU case
if psyir.name not in DONT_PARALLELISE:
insert_explicit_loop_parallelism(
subroutine,
region_directive_trans=omp_parallel_trans,
loop_directive_trans=omp_loop_trans,
collapse=False,
privatise_arrays=psyir.name != "ldftra.f90",
)
else:
# GPU case
# Skip processing for certain files
if psyir.name.startswith("obs_"):
return
# Special cases and GPU transformations
if psyir.name == "stpctl.f90":
for loop in subroutine.walk(Loop):
if loop.ancestor(Directive):
continue
try:
omp_loop_trans.apply(loop, options={"force": True})
except TransformationError:
continue
omp_target_trans.apply(loop.parent.parent)
assigns = loop.walk(Assignment)
if len(assigns) == 1 and assigns[0].lhs.symbol.name == "zmax":
stmt = assigns[0]
if OMPAtomicDirective.is_valid_atomic_statement(stmt):
parent = stmt.parent
atomic = OMPAtomicDirective()
atomic.children[0].addchild(stmt.detach())
parent.addchild(atomic)
continue
if psyir.name not in DONT_PARALLELISE:
insert_explicit_loop_parallelism(
subroutine,
region_directive_trans=omp_target_trans,
loop_directive_trans=omp_loop_trans,
collapse=True,
)
@addy419 @sergisiso regarding the topic discussed at the mattermost chat
I'm going to close this one as it's not really a PSyclone issue per se, more usability. We do now automatically add OMP threading where we fail to offload (see the examples/nemo/scripts).
Thanks Andy, since the script is not yet in the master branch, I will put a reference here:
https://github.com/stfc/PSyclone/blob/7179f0e12469bfbd8799628444048e7237f24835/examples/nemo/scripts/omp_gpu_trans.py#L199-L213