f18-llvm-project icon indicating copy to clipboard operation
f18-llvm-project copied to clipboard

Implementation status of OpenMP Threadprivate directive

Open PeixinQiao opened this issue 4 years ago • 21 comments

https://github.com/flang-compiler/f18-llvm-project/pull/1135 supports non-character scalar, character, array, character array, pointer, allocatable, pointer, derived type, common block, non-SAVEd non-initialized non-character scalar in main program (this is special case).

It supports use association and argument association, but don't support host assocation currently.

PeixinQiao avatar Oct 16 '21 08:10 PeixinQiao

Test case 1: integer:

program test
  use omp_lib
  integer :: tid, y
  integer, save :: x

  !$omp threadprivate(x)

  x = -1
  y = -1

  !$omp parallel private(tid) num_threads(2)
  tid = omp_get_thread_num()
  print *,"tid = ", tid, ",x = ", x, ",y = ", y
  x = x + tid + 10
  y = y + tid + 10
  print *,"changed: tid = ", tid, ",x = ", x, ",y = ", y
  !$omp end parallel

  tid = omp_get_thread_num()
  print *,"middle: tid = ", tid, ",x = ", x, ",y = ", y

  !$omp parallel private(tid) num_threads(2)
  tid = omp_get_thread_num()
  print *,"second loop: tid = ", tid, ",x = ", x, ",y = ", y
  x = x + tid + 10
  y = y + tid + 10
  print *,"second loop changed: tid = ", tid, ",x = ", x, ",y = ", y
  !$omp end parallel
end

Expected result:

tid = 1,x = 0,y = -1 changed: tid = 1,x = 11,y = 10 tid = 0,x = -1,y = 10 changed: tid = 0,x = 9,y = 20 middle: tid = 0,x = 9,y = 20 second loop: tid = 0,x = 9,y = 20 second loop changed: tid = 0,x = 19,y = 30 second loop: tid = 1,x = 11,y = 30 second loop changed: tid = 1,x = 22,y = 41

PeixinQiao avatar Oct 16 '21 09:10 PeixinQiao

Test case 2: real

program test
  use omp_lib
  integer :: tid
  real, save :: x, y

  !$omp threadprivate(x)

  x = -1
  y = -1

  !$omp parallel private(tid) num_threads(2)
  tid = omp_get_thread_num()
  print *,"tid = ", tid, ",x = ", x, ",y = ", y
  x = x + tid + 10
  y = y + tid + 10
  print *,"changed: tid = ", tid, ",x = ", x, ",y = ", y
  !$omp end parallel

  tid = omp_get_thread_num()
  print *,"middle: tid = ", tid, ",x = ", x, ",y = ", y

  !$omp parallel private(tid) num_threads(2)
  tid = omp_get_thread_num()
  print *,"second loop: tid = ", tid, ",x = ", x, ",y = ", y
  x = x + tid + 10
  y = y + tid + 10
  print *,"second loop changed: tid = ", tid, ",x = ", x, ",y = ", y
  !$omp end parallel
end

Expected result:

tid = 0,x = -1.,y = -1. changed: tid = 0,x = 9.,y = 9. tid = 1,x = 0.,y = 9. changed: tid = 1,x = 11.,y = 20. middle: tid = 0,x = 9.,y = 20. second loop: tid = 0,x = 9.,y = 20. second loop changed: tid = 0,x = 19.,y = 30. second loop: tid = 1,x = 11.,y = 30. second loop changed: tid = 1,x = 22.,y = 41.

PeixinQiao avatar Oct 16 '21 09:10 PeixinQiao

Test case 3: complex

program test
  use omp_lib
  integer :: tid
  complex, save :: x, y

  !$omp threadprivate(x)

  x = cmplx(-1, -1)
  y = cmplx(-1, -1)

  !$omp parallel private(tid) num_threads(2)
  tid = omp_get_thread_num()
  print *,"tid = ", tid, ",x = ", x, ",y = ", y
  x = x + tid + cmplx(10, 10)
  y = y + tid + cmplx(10, 10)
  print *,"changed: tid = ", tid, ",x = ", x, ",y = ", y
  !$omp end parallel

  tid = omp_get_thread_num()
  print *,"middle: tid = ", tid, ",x = ", x, ",y = ", y

  !$omp parallel private(tid) num_threads(2)
  tid = omp_get_thread_num()
  print *,"second loop: tid = ", tid, ",x = ", x, ",y = ", y
  x = x + tid + cmplx(10, 10)
  y = y + tid + cmplx(10, 10)
  print *,"second loop changed: tid = ", tid, ",x = ", x, ",y = ", y
  !$omp end parallel
end

Expected result:

tid = 1,x = (0.,0.),y = (-1.,-1.) changed: tid = 1,x = (11.,10.),y = (10.,9.) tid = 0,x = (-1.,-1.),y = (10.,9.) changed: tid = 0,x = (9.,9.),y = (20.,19.) middle: tid = 0,x = (9.,9.),y = (20.,19.) second loop: tid = 0,x = (9.,9.),y = (20.,19.) second loop changed: tid = 0,x = (19.,19.),y = (30.,29.) second loop: tid = 1,x = (11.,10.),y = (30.,29.) second loop changed: tid = 1,x = (22.,20.),y = (41.,39.)

PeixinQiao avatar Oct 16 '21 09:10 PeixinQiao

Test case 4: logical

program test
  use omp_lib
  integer :: tid
  logical, save :: x, y

  !$omp threadprivate(x)

  x = .false.
  y = .false.

  !$omp parallel private(tid) num_threads(2)
  tid = omp_get_thread_num()
  print *,"tid = ", tid, ",x = ", x, ",y = ", y
  !$omp end parallel

  tid = omp_get_thread_num()
  print *,"middle: tid = ", tid, ",x = ", x, ",y = ", y
  x = .true.
  y = .true.
  print *,"middle changed: tid = ", tid, ",x = ", x, ",y = ", y

  !$omp parallel private(tid) num_threads(2)
  tid = omp_get_thread_num()
  print *,"second loop: tid = ", tid, ",x = ", x, ",y = ", y
  !$omp end parallel

  print *,"final: tid = ", tid, ",x = ", x, ",y = ", y
end

Expected result:

tid = 0,x = F ,y = F tid = 1,x = F ,y = F middle: tid = 0,x = F ,y = F middle changed: tid = 0,x = T ,y = T second loop: tid = 0,x = T ,y = T second loop: tid = 1,x = F ,y = T final: tid = 0,x = T ,y = T

PeixinQiao avatar Oct 16 '21 09:10 PeixinQiao

Test case 5: array:

program test
  use omp_lib
  integer :: tid
  integer :: x(5), y(5)

  !$omp threadprivate(x)

  x = (/1,1,1,1,1/)
  y = (/1,1,1,1,1/)

  !$omp parallel private(tid) num_threads(2)
  tid = omp_get_thread_num()
  print *,"tid = ", tid, ",x = ", x, ",y = ", y
  x(1) = x(1) + 1
  x(2:3) = x(4:5) + 2
  x = x + 1
  print *,"changed: tid = ", tid, ",x = ", x(1), x(2), x(3), x(4), x(5), ",y = ", y
  !$omp end parallel

  tid = omp_get_thread_num()
  print *,"middle: tid = ", tid, ",x = ", x, ",y = ", y

  !$omp parallel private(tid) num_threads(2)
  tid = omp_get_thread_num()
  print *,"second loop: tid = ", tid, ",x = ", x(1), x(2), x(3), x(4), x(5), ",y = ", y
  !$omp end parallel
end

Expected result:

tid = 0,x = 1 1 1 1 1,y = 1 1 1 1 1 tid = 1,x = 0 0 0 0 0,y = 1 1 1 1 1 changed: tid = 0,x = 3 4 4 2 2,y = 1 1 1 1 1 changed: tid = 1,x = 2 3 3 1 1,y = 1 1 1 1 1 middle: tid = 0,x = 3 4 4 2 2,y = 1 1 1 1 1 second loop: tid = 0,x = 3 4 4 2 2,y = 1 1 1 1 1 second loop: tid = 1,x = 2 3 3 1 1,y = 1 1 1 1 1

PeixinQiao avatar Oct 16 '21 09:10 PeixinQiao

Test case 6: character:

program test
  use omp_lib
  integer :: tid
  character(len=5) :: x, y

  !$omp threadprivate(x)

  x = "aaaaa"
  y = "xxxxx"

  !$omp parallel private(tid) num_threads(2)
  tid = omp_get_thread_num()
  print *,"tid = ", tid, ",x = ", x, ",y = ", y
  x(2:3) = "bb"
  print *,"changed: tid = ", tid, ",x = ", x, ",y = ", y
  !$omp end parallel

  tid = omp_get_thread_num()
  print *,"middle: tid = ", tid, ",x = ", x, ",y = ", y

  !$omp parallel private(tid) num_threads(2)
  tid = omp_get_thread_num()
  print *,"second loop: tid = ", tid, ",x = ", x, ",y = ", y
  !$omp end parallel
end

Expected result:

tid = 1,x = ,y = xxxxx changed: tid = 1,x = bb,y = xxxxx tid = 0,x = aaaaa,y = xxxxx changed: tid = 0,x = abbaa,y = xxxxx middle: tid = 0,x = abbaa,y = xxxxx second loop: tid = 0,x = abbaa,y = xxxxx second loop: tid = 1,x = bb,y = xxxxx

PeixinQiao avatar Oct 16 '21 09:10 PeixinQiao

Thanks for the detailed status and tracking the progress for threadprivate. I will create a similar one for data-sharing clauses as well. Support for commonblock is required for OpenMP 1.0 both in threadprivate as well as the data-sharing clauses. We should discuss this sometime this month/next month.

kiranchandramohan avatar Oct 19 '21 12:10 kiranchandramohan

@kiranchandramohan OK. Maybe we can discuss in Thursday's meeting this week.

PeixinQiao avatar Oct 19 '21 13:10 PeixinQiao

Test case 7: common block:

program test
  use omp_lib
  integer :: tid, a
  real :: b(2)
  complex :: c
  logical :: d
  character(5) :: e, f(2)
  common /blk/ a, b, c, d, e, f

  !$omp threadprivate(/blk/)

  a = 1
  b(2) = 1
  c = 1
  d = .true.
  e = "xx"
  f(2) = "xx"

  !$omp parallel private(tid) num_threads(2)
  tid = omp_get_thread_num()
  print *,"tid = ", tid, ": ", a, b, c, d, e, f
  a = 2
  b(2) = 2
  c = 2
  d = .false.
  e = "yy"
  f(2) = "yy"
  print *,"changed: tid = ", tid, ": ", a, b, c, d, e, f
  !$omp end parallel

  tid = omp_get_thread_num()
  print *,"middle: tid = ", tid, ": ", a, b, c, d, e, f

  !$omp parallel private(tid) num_threads(2)
  tid = omp_get_thread_num()
  print *,"second loop: tid = ", tid, ": ", a, b, c, d, e, f
  !$omp end parallel
end

Expected results:

 tid =  0:  1 0. 1. (1.,0.) T xx   xx   
 changed: tid =  0:  2 0. 2. (2.,0.) F yy   yy   
 tid =  1:  0 0. 0. (0.,0.) F 
 changed: tid =  1:  2 0. 2. (2.,0.) F yy   yy   
 middle: tid =  0:  2 0. 2. (2.,0.) F yy   yy   
 second loop: tid =  0:  2 0. 2. (2.,0.) F yy   yy   
 second loop: tid =  1:  2 0. 2. (2.,0.) F yy   yy   

PeixinQiao avatar Oct 30 '21 08:10 PeixinQiao

Test case 8: derived type:

program test
  use omp_lib
  integer :: tid
  type my_type
    integer :: t_i
    integer :: t_arr(5)
  end type my_type
  type(my_type), save :: x, y

  !$omp threadprivate(x)

  x%t_i = -1
  y%t_i = -1

  !$omp parallel private(tid) num_threads(2)
  tid = omp_get_thread_num()
  print *,"tid = ", tid, ",x = ", x, ",y = ", y
  x%t_i = 2
  print *,"changed: tid = ", tid, ",x = ", x, ",y = ", y
  !$omp end parallel

  tid = omp_get_thread_num()
  print *,"middle: tid = ", tid, ",x = ", x, ",y = ", y

  !$omp parallel private(tid) num_threads(2)
  tid = omp_get_thread_num()
  print *,"second loop: tid = ", tid, ",x = ", x, ",y = ", y
  !$omp end parallel
end

Expected results:

 tid =  1,x =  0 0 0 0 0 0,y =  -1 0 0 0 0 0
 changed: tid =  1,x =  2 0 0 0 0 0,y =  -1 0 0 0 0 0
 tid =  0,x =  -1 0 0 0 0 0,y =  -1 0 0 0 0 0
 changed: tid =  0,x =  2 0 0 0 0 0,y =  -1 0 0 0 0 0
 middle: tid =  0,x =  2 0 0 0 0 0,y =  -1 0 0 0 0 0
 second loop: tid =  0,x =  2 0 0 0 0 0,y =  -1 0 0 0 0 0
 second loop: tid =  1,x =  2 0 0 0 0 0,y =  -1 0 0 0 0 0

PeixinQiao avatar Oct 30 '21 08:10 PeixinQiao

Test case 9: char array:

program test
  use omp_lib
  integer :: tid
  character(len=5) :: x(2), y(2)

  !$omp threadprivate(x)

  x(1) = "xxxxx"
  y(1) = "yyyyy"
  x(2) = "aaaaa"
  y(2) = "zzzzz"

  !$omp parallel private(tid) num_threads(2)
  tid = omp_get_thread_num()
  print *,"tid = ", tid, ",x = ", x, ",y = ", y
  x(2) = "bb"
  !x(5) = "e"
  print *,"changed: tid = ", tid, ",x = ", x, ",y = ", y
  !$omp end parallel

  tid = omp_get_thread_num()
  print *,"middle: tid = ", tid, ",x = ", x, ",y = ", y

  !$omp parallel private(tid) num_threads(2)
  tid = omp_get_thread_num()
  print *,"second loop: tid = ", tid, ",x = ", x, ",y = ", y
  !$omp end parallel
end

Expected results:

 tid =  1,x = ,y = yyyyyzzzzz
 changed: tid =  1,x = bb   ,y = yyyyyzzzzz
 tid =  0,x = xxxxxaaaaa,y = yyyyyzzzzz
 changed: tid =  0,x = xxxxxbb   ,y = yyyyyzzzzz
 middle: tid =  0,x = xxxxxbb   ,y = yyyyyzzzzz
 second loop: tid =  0,x = xxxxxbb   ,y = yyyyyzzzzz
 second loop: tid =  1,x = bb   ,y = yyyyyzzzzz

PeixinQiao avatar Oct 30 '21 08:10 PeixinQiao

Test case 10: allocatable && pointer:

program main
  use omp_lib
  integer, allocatable, save :: x, x2(:)
  integer, pointer, save :: y, y2(:)
  integer, target :: z, z2(2)
  integer :: tid

  !$omp threadprivate(x, x2, y, y2)

  allocate(x)
  allocate(x2(2))

  x = 2
  x2(1) = 2
  x2(2) = 2
  z = 2
  z2(1) = 2
  z2(2) = 2
  y=>z
  y2=>z2
  print *, x, x2, y, y2
  !$omp parallel num_threads(2) private(tid)
    tid = omp_get_thread_num()
    if (tid .gt. 0) then
      allocate(x)
      allocate(x2(2))
      x = 1
      x2(1) = 1
      x2(2) = 1
      z = 1
      z2(1) = 1
      z2(2) = 1
      y=>z
      y2=>z2
    endif

    print *, "tid = ", tid, ": ", x, x2, y, y2
  !$omp end parallel

  tid = omp_get_thread_num()
  print *, "tid = ", tid, ": ", x, x2, y, y2

  !$omp parallel num_threads(2) private(tid)
    tid = omp_get_thread_num()
    print *, "tid = ", tid, ": ", x, x2, y, y2
  !$omp end parallel
end

Expected results:

 2 2 2 2 2 2
 tid =  0:  2 2 2 2 2 2
 tid =  1:  1 1 1 1 1 1
 tid =  0:  2 2 2 1 1 1
 tid =  0:  2 2 2 1 1 1
 tid =  1:  1 1 1 1 1 1

PeixinQiao avatar Oct 30 '21 08:10 PeixinQiao

Test case 11: use assocation for non-common block

$ cat use-assoc.f90
module mod1
  use omp_lib
  integer :: x
  real :: y
  complex :: z
  logical :: l
  real, pointer :: a
  !$omp threadprivate(x,y,z,l,a)

contains
  subroutine sub()
    real, target :: b = 5.0
    !$omp parallel num_threads(2)
      if (omp_get_thread_num() == 1) then
        a=>b
      endif
      print *, x, y, z, l, a
    !$omp end parallel
  end
end
$ cat main.f90
program main
  use mod1
  real, target :: b = 4.0
  x = 1
  y = 2.0
  z = 3.0
  l = .true.
  a=>b
  call sub()
end

Expected results:

$ flang-new -fopenmp use-assoc.f90 -c
$ flang-new -fopenmp main.f90 -c
$ flang-new -fopenmp main.o use-assoc.o 
$ ./a.out 
 0 0. (0.,0.) F 5.
 1 2. (3.,0.) T 4.

PeixinQiao avatar Mar 17 '22 13:03 PeixinQiao

Test case 12: use assocation for common block

$ cat use-assoc2.f90
module mod1
  use omp_lib
  integer :: x
  real :: y
  complex :: z
  logical :: l
  real, pointer :: a
  common /blk/ x,y,z,l,a
  !$omp threadprivate(/blk/)

contains
  subroutine sub()
	real, target :: b = 5.0
	!$omp parallel num_threads(2)
	if (omp_get_thread_num() == 1) then
	  a=>b
	endif
	  print *, x, y, z, l, a
	!$omp end parallel
  end
end
$ cat main2.f90 
program main
  use mod1
  integer :: x1
  real :: y1
  complex :: z1
  logical :: l1
  real, pointer :: a1
  common /blk/ x1,y1,z1,l1,a1
  !$omp threadprivate(/blk/)
  real, target :: b = 4.0
  x1 = 1
  y1 = 2.0
  z1 = 3.0
  l1 = .true.
  a1=>b
  call sub()
end

Expected results:

$ flang-new -fopenmp use-assoc2.f90 -c
$ flang-new -fopenmp main2.f90 -c
$ flang-new -fopenmp main2.o use-assoc2.o
$ ./a.out 
 0 0. (0.,0.) F 5.
 1 2. (3.,0.) T 4.

PeixinQiao avatar Mar 17 '22 13:03 PeixinQiao

Test case 13: host assocation

$ cat hostassoc1.f90 
program main
  use omp_lib
  integer, save :: a
  !$omp threadprivate(a)
  call sub()
contains
  subroutine sub()
    a = 2
    !$omp parallel num_threads(4)
      a = omp_get_thread_num()
      print *, a, omp_get_thread_num()
    !$omp end parallel
    !$omp parallel num_threads(4)
      print *, a, omp_get_thread_num()
    !$omp end parallel
  end
end
$ cat hostassoc2.f90 
program main
  use omp_lib
  integer :: a
  !$omp threadprivate(a)
  call sub()
contains
  subroutine sub()
    a = 2
    !$omp parallel num_threads(4)
      a = omp_get_thread_num()
      print *, a, omp_get_thread_num()
    !$omp end parallel
    !$omp parallel num_threads(4)
      print *, a, omp_get_thread_num()
    !$omp end parallel
  end
end

Expected results:

           0           0
           2           2
           3           3
           1           1
           2           2
           3           3
           1           1
           0           0

gfortran 12 and ifort 2021 support hostassoc1.f90, but do not support hostassoc2.f90. For hostassoc2.f90, gfortran 12 gives wrong running results, while ifort reports the semantic error.

PeixinQiao avatar Mar 17 '22 13:03 PeixinQiao

Test case 14: common block in threadprivate and used in multiple program units (revised from test case in classic flang)

program mian
  use omp_lib
  integer result(4)
  common/result/result
  call sub0
  print *, result
end
subroutine sub0
  use omp_lib
  common /com/ ic1, ic2
  !$omp   threadprivate ( /com/ )
  ic1 = 2
  ic2 = 4
  call sub1
  call sub2
end
subroutine sub1
  use omp_lib
integer :: tid
common /com/ ic1, ic2
!$omp   threadprivate ( /com/ )
!$omp   parallel num_threads(2)
tid = omp_get_thread_num()
ic1 = ic1 + tid
ic2 = ic2 + tid
!$omp end parallel
end
subroutine sub2
  use omp_lib
integer :: tid
integer result(4)
common/result/result
common /com/ ic1, ic2
!$omp   threadprivate ( /com/ )
!$omp   parallel num_threads(2)
tid = omp_get_thread_num()
result(1+tid) = ic1
result(3+tid) = ic2
!$omp   endparallel
end

Expected results:

 2 1 4 1

PeixinQiao avatar Mar 17 '22 13:03 PeixinQiao

Test case 15: argument assocation

program main
  use omp_lib
  integer, save :: a
  !$omp threadprivate(a)
  call sub(a)
contains
  subroutine sub(a)
    integer :: a
    a = 2
    !$omp parallel num_threads(4)
      a = omp_get_thread_num()
      print *, a, omp_get_thread_num()
    !$omp end parallel
    !$omp parallel num_threads(4)
      print *, a
    !$omp end parallel
  end
end

Expected results:

 3 3
 3 0
 3 1
 3 2
 3
 3
 3
 3

PeixinQiao avatar Mar 17 '22 13:03 PeixinQiao

@PeixinQiao Can we migrate this to llvm-project issues? Let me know if you need help.

kiranchandramohan avatar Feb 09 '23 21:02 kiranchandramohan

@kiranchandramohan Should I remove all the test cases (the whole issue) or unsupported test cases?

PeixinQiao avatar Feb 11 '23 03:02 PeixinQiao

I think you can file an isssue (or issues) for unsupported cases in llvm-project. You can add a link to this issue from the new issue or from the excel sheet where we track OpenMP issues.

kiranchandramohan avatar Feb 14 '23 11:02 kiranchandramohan

I think you can file an isssue (or issues) for unsupported cases in llvm-project. You can add a link to this issue from the new issue or from the excel sheet where we track OpenMP issues.

OK. I will do it.

PeixinQiao avatar Feb 14 '23 14:02 PeixinQiao