-
Notifications
You must be signed in to change notification settings - Fork 15
/
tensor_algebra_cpu.F90
11553 lines (11415 loc) · 490 KB
/
tensor_algebra_cpu.F90
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
!Tensor Algebra for Multi- and Many-core CPUs (OpenMP based).
!AUTHOR: Dmitry I. Lyakh (Liakh): [email protected]
!REVISION: 2022/01/27
!Copyright (C) 2013-2022 Dmitry I. Lyakh (Liakh)
!Copyright (C) 2014-2022 Oak Ridge National Laboratory (UT-Battelle)
!LICENSE: BSD 3-Clause
!GNU linking: -lblas -gfortran -lgomp
!ACRONYMS:
! - mlndx - multiindex;
! - Lm - Level-min segment size (the lowest level segment size for bricked storage);
! - dlf - dimension-led storage of tensor blocks where the 1st dimension is the most minor (Fortran like) (DEFAULT):
! Numeration within each dimension starts from 0: [0..extent-1].
! - dlc - dimension-led storage of tensor blocks where the 1st dimension is the most senior (C like);
! Numeration within each dimension starts from 0: [0..extent-1].
! - brf - bricked storage of tensor blocks where the 1st dimension is the most minor (Fortran like) (DEFAULT);
! - brc - bricked storage of tensor blocks where the 1st dimension is the most senior (C like);
! - r4 - real(4);
! - r8 - real(8);
! - c4 - complex(4);
! - c8 - complex(8);
!PREPROCESSOR:
! -D NO_OMP: Do not use OpenMP (serial);
! -D NO_BLAS: Replace BLAS calls with in-house routines (slower);
! -D USE_MKL: Use Intel MKL library interface for BLAS;
! -D NO_PHI: Ignore Intel MIC (Xeon Phi);
module tensor_algebra_cpu
! use, intrinsic:: ISO_C_BINDING
! use dil_basic
use tensor_algebra !includes ISO_C_BINDING, dil_basic
use stsubs
use combinatoric
use timers
use symm_index
#ifdef USE_MKL
!use blas95
!use lapack95
!use f95_precision
!use mkl_service
#endif
#ifndef NO_OMP
use omp_lib
implicit none
public
#else
implicit none
public
integer, external:: omp_get_max_threads,omp_set_num_threads
#endif
!PARAMETERS:
!Default output for the module procedures:
integer, private:: CONS_OUT=6 !default output device for this module (also used for INTEL MIC TAL)
logical, private:: VERBOSE=.TRUE. !verbosity (also used for INTEL MIC TAL)
integer, private:: DEBUG=0 !debugging mode
integer, private:: LOGGING=0 !logging mode
logical, private:: CHECK_NAN=.FALSE. !NaN checking mode (check all tensor blocks for NaN)
#ifndef NO_PHI
!DIR$ ATTRIBUTES OFFLOAD:mic:: CONS_OUT,VERBOSE
!DIR$ ATTRIBUTES ALIGN:128:: CONS_OUT,VERBOSE
#endif
!Global:
integer, parameter, public:: MAX_SHAPE_STR_LEN=1024 !max allowed length for a tensor shape specification string (TSSS)
integer, parameter, public:: LONGINT=INTL !long integer size in bytes
integer, parameter, public:: CPTAL_MAX_THREADS=1024 !max allowed number of threads in this module
integer, private:: MEM_ALLOC_POLICY=MEM_ALLOC_TMP_BUF !memory allocation policy
logical, private:: MEM_ALLOC_FALLBACK=.TRUE. !memory allocation fallback to the regular allocator
logical, private:: ZERO_UNINITIALIZED_OUTPUT=.TRUE. !initialize uninitialized output tensors to zero in tensor contractions
logical, private:: DATA_KIND_SYNC=.FALSE. !if .TRUE., each tensor operation will syncronize all existing data kinds
logical, private:: TRANS_SHMEM=.TRUE. !cache-efficient (true) VS scatter (false) tensor transpose algorithm
#ifndef NO_BLAS
logical, private:: DISABLE_BLAS=.FALSE. !if .TRUE. and BLAS is accessible, BLAS calls will be replaced by my own routines
#else
logical, private:: DISABLE_BLAS=.TRUE. !if .TRUE. and BLAS is accessible, BLAS calls will be replaced by my own routines
#endif
#ifndef NO_PHI
!DIR$ ATTRIBUTES OFFLOAD:mic:: MAX_SHAPE_STR_LEN,LONGINT,CPTAL_MAX_THREADS,MEM_ALLOC_POLICY,MEM_ALLOC_FALLBACK
!DIR$ ATTRIBUTES OFFLOAD:mic:: DATA_KIND_SYNC,TRANS_SHMEM,DISABLE_BLAS
!DIR$ ATTRIBUTES ALIGN:128:: MAX_SHAPE_STR_LEN,LONGINT,CPTAL_MAX_THREADS,MEM_ALLOC_POLICY,MEM_ALLOC_FALLBACK
!DIR$ ATTRIBUTES ALIGN:128:: DATA_KIND_SYNC,TRANS_SHMEM,DISABLE_BLAS
#endif
!Numerical:
real(8), parameter, private:: ABS_CMP_THRESH=1d-13 !default absolute error threshold for numerical comparisons
real(8), parameter, private:: REL_CMP_THRESH=1d-2 !default relative error threshold for numerical comparisons
#ifndef NO_PHI
!DIR$ ATTRIBUTES OFFLOAD:mic:: ABS_CMP_THRESH,REL_CMP_THRESH
!DIR$ ATTRIBUTES ALIGN:128:: ABS_CMP_THRESH,REL_CMP_THRESH
#endif
!DERIVED DATA TYPES:
!Tensor shape (storage layout specification for a tensor block):
type, public:: tensor_shape_t
integer:: num_dim=-1 !total number of dimensions (num_dim=0 defines a scalar tensor).
integer, pointer:: dim_extent(:)=>NULL() !extent of each dimension (if num_dim>0): range=[0..extent-1].
integer, pointer:: dim_divider(:)=>NULL() !divider for each dimension, i.e. the <Lm_segment_size> (ordered dimensions must have the same divider!): %dim_divider(1)=0 means that an alternative (neither dimension-led nor bricked) storage layout is used.
integer, pointer:: dim_group(:)=>NULL() !dimension grouping (default group 0 means no symmetry restrictions): if %dim_divider(1)=0, then %dim_group(1) regulates the alternative storage layout kind.
end type tensor_shape_t
!Tensor block:
type, public:: tensor_block_t
integer(LONGINT):: tensor_block_size=0_LONGINT !total number of elements in the tensor block (informal, set after creation)
integer:: ptr_alloc=0 !pointer allocation status (bits set when pointers are getting allocated, not just associated)
type(tensor_shape_t):: tensor_shape !shape of the tensor block (see above)
complex(8):: scalar_value=(0d0,0d0) !scalar value if the rank is zero, otherwise can be used for storing the norm of the tensor block
real(4), pointer, contiguous:: data_real4(:)=>NULL() !tensor block data (float)
real(8), pointer, contiguous:: data_real8(:)=>NULL() !tensor block data (double)
complex(4), pointer, contiguous:: data_cmplx4(:)=>NULL() !tensor block data (float complex)
complex(8), pointer, contiguous:: data_cmplx8(:)=>NULL() !tensor block data (double complex)
end type tensor_block_t
!GLOBAL DATA:
real(8), private:: cpu_flops=0d0 !total CPU executed flops
real(8), private:: cpu_flop_time=0d0 !time spent executing CPU flops
real(8), private:: cpu_permute_bytes=0d0 !total CPU permuted data size
real(8), private:: cpu_permute_time=0d0 !time spent permuting data on CPU
real(8), private:: cpu_contract_time=0d0 !total time spent in tensor contractions on CPU
!GENERIC INTERFACES:
interface tensor_block_shape_create
module procedure tensor_block_shape_create_sym
module procedure tensor_block_shape_create_num
end interface tensor_block_shape_create
interface array_alloc
module procedure array_alloc_r4
module procedure array_alloc_r8
module procedure array_alloc_c4
module procedure array_alloc_c8
end interface array_alloc
interface array_free
module procedure array_free_r4
module procedure array_free_r8
module procedure array_free_c4
module procedure array_free_c8
end interface array_free
interface tensor_block_slice_dlf
module procedure tensor_block_slice_dlf_r4
module procedure tensor_block_slice_dlf_r8
module procedure tensor_block_slice_dlf_c4
module procedure tensor_block_slice_dlf_c8
end interface tensor_block_slice_dlf
interface tensor_block_insert_dlf
module procedure tensor_block_insert_dlf_r4
module procedure tensor_block_insert_dlf_r8
module procedure tensor_block_insert_dlf_c4
module procedure tensor_block_insert_dlf_c8
end interface tensor_block_insert_dlf
interface tensor_block_copy_dlf
module procedure tensor_block_copy_dlf_r4
module procedure tensor_block_copy_dlf_r8
module procedure tensor_block_copy_dlf_c4
module procedure tensor_block_copy_dlf_c8
end interface tensor_block_copy_dlf
interface tensor_block_copy_scatter_dlf
module procedure tensor_block_copy_scatter_dlf_r4
module procedure tensor_block_copy_scatter_dlf_r8
module procedure tensor_block_copy_scatter_dlf_c4
module procedure tensor_block_copy_scatter_dlf_c8
end interface tensor_block_copy_scatter_dlf
interface tensor_block_fcontract_dlf
module procedure tensor_block_fcontract_dlf_r4
module procedure tensor_block_fcontract_dlf_r8
module procedure tensor_block_fcontract_dlf_c4
module procedure tensor_block_fcontract_dlf_c8
end interface tensor_block_fcontract_dlf
interface tensor_block_pcontract_dlf
module procedure tensor_block_pcontract_dlf_r4
module procedure tensor_block_pcontract_dlf_r8
module procedure tensor_block_pcontract_dlf_c4
module procedure tensor_block_pcontract_dlf_c8
end interface tensor_block_pcontract_dlf
interface tensor_block_ftrace_dlf
module procedure tensor_block_ftrace_dlf_r4
module procedure tensor_block_ftrace_dlf_r8
module procedure tensor_block_ftrace_dlf_c4
module procedure tensor_block_ftrace_dlf_c8
end interface tensor_block_ftrace_dlf
interface tensor_block_ptrace_dlf
module procedure tensor_block_ptrace_dlf_r4
module procedure tensor_block_ptrace_dlf_r8
module procedure tensor_block_ptrace_dlf_c4
module procedure tensor_block_ptrace_dlf_c8
end interface tensor_block_ptrace_dlf
#ifndef NO_BLAS
interface tensor_block_pcontract_batch_dlf
module procedure tensor_block_pcontract_batch_dlf_r4
module procedure tensor_block_pcontract_batch_dlf_r8
module procedure tensor_block_pcontract_batch_dlf_c4
module procedure tensor_block_pcontract_batch_dlf_c8
end interface tensor_block_pcontract_batch_dlf
#endif
!FUNCTION VISIBILITY:
public get_mem_alloc_policy !gets the current memory allocation policy for sizeable arrays
public set_mem_alloc_policy !sets the memory allocation policy for sizeable arrays
public set_data_kind_sync !turns on/off data kind synchronization (0/1)
public set_transpose_algorithm !switches between scatter (0) and shared-memory (1) tensor transpose algorithms
public set_matmult_algorithm !switches between BLAS GEMM (0) and my OpenMP matmult kernels (1)
public cptal_print_stats !prints the tensor operation execution statistics on Host CPU
public cmplx4_to_real4 !returns the real approximate of a complex number (algorithm by D.I.L.)
public cmplx8_to_real8 !returns the real approximate of a complex number (algorithm by D.I.L.)
public tensor_shape_assoc !constructs a tensor shape object by pointer associating with external data
public tensor_block_layout !returns the type of the storage layout for a given tensor block
public tensor_block_shape_size !determines the tensor block size induced by its shape
public tensor_master_data_kind !determines the master data kind present in a tensor block
public tensor_common_data_kind !determines the common data kind present in two compatible tensor blocks
public tensor_block_compatible !determines whether two tensor blocks are compatible (under an optional index permutation)
public tensor_block_mimic !mimics the internal structure of a tensor block without copying the actual data
public tensor_block_create !creates a tensor block based on the shape specification string (SSS)
public tensor_block_init !initializes a tensor block with either a predefined value or random numbers
public tensor_block_is_empty !returns TRUE if the tensor block is empty, FALSE otherwise
public tensor_block_assoc !associates an empty <tensor_block_t> object with externally provided data
public tensor_block_destroy !destroys a tensor block
public tensor_block_sync !allocates and/or synchronizes different data kinds in a tensor block
public tensor_block_scale !multiplies all elements of a tensor block by some factor
public tensor_block_conjg !complex conjugates all elements of a tensor block
public tensor_block_norm1 !determines the 1-norm of a tensor block (the sum of moduli of all elements)
public tensor_block_norm2 !determines the squared Euclidean (Frobenius) 2-norm of a tensor block
public tensor_block_max !determines the maximal (by modulus) tensor block element
public tensor_block_min !determines the minimal (by modulus) tensor block element
public tensor_block_slice !extracts a slice from a tensor block
public tensor_block_insert !inserts a slice into a tensor block
public tensor_block_print !prints a tensor block
public tensor_block_trace !intra-tensor index contraction (accumulative trace)
public tensor_block_cmp !compares two tensor blocks
public tensor_block_copy !makes a copy of a tensor block (with an optional index permutation)
public tensor_block_add !adds one tensor block to another
public tensor_block_contract !inter-tensor index contraction (accumulative contraction)
public tensor_block_decompose_svd !decomposes a given tensor block using a full or partial SVD
public tensor_block_scalar_value !returns the scalar value component of <tensor_block_t>
public tensor_block_has_nan !returns TRUE if the tensor block has a NaN element
public get_mlndx_addr !generates an array of addressing increments for the linearization map for symmetric multi-indices
public mlndx_value !returns the address associated with a (symmetric) multi-index, based on the array generated by <get_mlndx_addr>
public tensor_shape_rnd !returns a random tensor-shape-specification-string (TSSS)
public tensor_shape_rank !returns the number of dimensions in a tensor-shape-specification-string (TSSS)
public tensor_shape_str_create !creates a tensor shape specification string
public get_contr_pattern_dig !converts a symbolic tensor contraction pattern into the digital form (used by tensor_block_contract)
public get_contr_pattern_sym !converts a digital tensor contraction pattern into a symbolic form
public get_contr_permutations !given a digital contraction pattern, returns all tensor permutations necessary for the subsequent matrix multiplication (no hypercontractions)
public get_contraction_permutations!given a digital contraction pattern, returns all tensor permutations necessary for the subsequent matrix multiplication (supports hypercontractions)
public contr_pattern_rnd !returns a random digital tensor contraction pattern
public coherence_control_var !returns a coherence control variable based on a mnemonic input
public tensor_block_shape_create !generates the tensor shape based on either the tensor shape specification string (TSSS) or numeric arguments
public tensor_block_shape_create_sym !generates the tensor shape based on the tensor shape specification string (TSSS)
public tensor_block_shape_create_num !generates the tensor shape based on the numeric arguments
public tensor_block_shape_ok !checks the correctness of a tensor shape generated from a tensor shape specification string (TSSS)
public tensor_block_alloc !sets/queries the allocation status of data pointers in a tensor block
private array_alloc !allocates an array pointer {R4,R8,C4,C8}
private array_alloc_r4 !allocates an array pointer R4
private array_alloc_r8 !allocates an array pointer R8
private array_alloc_c4 !allocates an array pointer C4
private array_alloc_c8 !allocates an array pointer C8
private array_free !frees an array pointer {R4,R8,C4,C8}
private array_free_r4 !frees an array pointer R4
private array_free_r8 !frees an array pointer R8
private array_free_c4 !frees an array pointer C4
private array_free_c8 !frees an array pointer C8
public tensor_block_slice_dlf !extracts a slice from a tensor block (Fortran-like dimension-led storage layout)
public tensor_block_insert_dlf !inserts a slice into a tensor block (Fortran-like dimension-led storage layout)
public tensor_block_copy_dlf !tensor transpose for dimension-led (Fortran-like-stored) dense tensor blocks
public tensor_block_copy_scatter_dlf !tensor transpose for dimension-led (Fortran-like-stored) dense tensor blocks (scattering variant)
public tensor_block_fcontract_dlf !multiplies two matrices derived from tensors to produce a scalar (left is transposed, right is normal)
public tensor_block_pcontract_dlf !multiplies two matrices derived from tensors to produce a third matrix (left is transposed, right is normal)
public tensor_block_ftrace_dlf !takes a full trace of a tensor block
public tensor_block_ptrace_dlf !takes a partial trace of a tensor block
#ifndef NO_BLAS
public tensor_block_pcontract_batch_dlf !batched version of tensor_block_pcontract_dlf
#endif
contains
!-----------------
!PUBLIC FUNCTIONS:
!----------------------------------------------------------------------
#ifndef NO_PHI
!DIR$ ATTRIBUTES OFFLOAD:mic:: get_mem_alloc_policy
#endif
function get_mem_alloc_policy(ierr,fallback) result(mem_policy) !SERIAL
!Gets the current memory allocation policy.
implicit none
integer:: mem_policy !out: memory allocation policy
integer, intent(out), optional:: ierr !out: error code
logical, intent(out), optional:: fallback !out: fallback status
mem_policy=MEM_ALLOC_POLICY
if(present(fallback)) fallback=MEM_ALLOC_FALLBACK
if(present(ierr)) ierr=0
return
end function get_mem_alloc_policy
!----------------------------------------------------------------
#ifndef NO_PHI
!DIR$ ATTRIBUTES OFFLOAD:mic:: set_mem_alloc_policy
#endif
subroutine set_mem_alloc_policy(mem_policy,ierr,fallback) !SERIAL
!Sets the memory allocation policy.
implicit none
integer, intent(in):: mem_policy !in: memory allocation policy
integer, intent(out), optional:: ierr !out: error code
logical, intent(in), optional:: fallback !in: memory allocation fallback to regular
select case(mem_policy)
case(MEM_ALLOC_REGULAR,MEM_ALLOC_TMP_BUF,MEM_ALLOC_ALL_BUF)
!!!$OMP ATOMIC WRITE SEQ_CST
!$OMP ATOMIC WRITE
MEM_ALLOC_POLICY=mem_policy
case default
if(present(ierr)) ierr=1 !invalid policy
return
end select
if(present(fallback)) then
!!!$OMP ATOMIC WRITE SEQ_CST
!$OMP ATOMIC WRITE
MEM_ALLOC_FALLBACK=fallback
endif
if(present(ierr)) ierr=0
return
end subroutine set_mem_alloc_policy
!-----------------------------------------
#ifndef NO_PHI
!DIR$ ATTRIBUTES OFFLOAD:mic:: set_data_kind_sync
#endif
subroutine set_data_kind_sync(alg) !SERIAL
implicit none
integer, intent(in):: alg
if(alg.eq.0) then
!!!$OMP ATOMIC WRITE SEQ_CST
!$OMP ATOMIC WRITE
DATA_KIND_SYNC=.FALSE.
else
!!!$OMP ATOMIC WRITE SEQ_CST
!$OMP ATOMIC WRITE
DATA_KIND_SYNC=.TRUE.
endif
return
end subroutine set_data_kind_sync
!----------------------------------------------
#ifndef NO_PHI
!DIR$ ATTRIBUTES OFFLOAD:mic:: set_transpose_algorithm
#endif
subroutine set_transpose_algorithm(alg) !SERIAL
implicit none
integer, intent(in):: alg
if(alg.eq.0) then
!!!$OMP ATOMIC WRITE SEQ_CST
!$OMP ATOMIC WRITE
TRANS_SHMEM=.FALSE.
else
!!!$OMP ATOMIC WRITE SEQ_CST
!$OMP ATOMIC WRITE
TRANS_SHMEM=.TRUE.
endif
return
end subroutine set_transpose_algorithm
!--------------------------------------------
#ifndef NO_PHI
!DIR$ ATTRIBUTES OFFLOAD:mic:: set_matmult_algorithm
#endif
subroutine set_matmult_algorithm(alg) !SERIAL
implicit none
integer, intent(in):: alg
#ifndef NO_BLAS
if(alg.eq.0) then
!!!$OMP ATOMIC WRITE SEQ_CST
!$OMP ATOMIC WRITE
DISABLE_BLAS=.FALSE.
else
!!!$OMP ATOMIC WRITE SEQ_CST
!$OMP ATOMIC WRITE
DISABLE_BLAS=.TRUE.
endif
#endif
return
end subroutine set_matmult_algorithm
!-------------------------------------------
subroutine cptal_print_stats()
implicit none
write(CONS_OUT,'("#MSG(TAL-SH::CP-TAL): Statistics on CPU:")')
write(CONS_OUT,'(1x,"Number of Flops processed : ",D25.14)') cpu_flops
if(cpu_flop_time.gt.0d0) then
write(CONS_OUT,'(1x,"Average GEMM GFlop/s rate : ",D25.14)') cpu_flops/(cpu_flop_time*1d9)
else
write(CONS_OUT,'(1x,"Average GEMM GFlop/s rate : ",D25.14)') 0d0
endif
write(CONS_OUT,'(1x,"Number of Bytes permuted : ",D25.14)') cpu_permute_bytes
if(cpu_permute_time.gt.0d0) then
write(CONS_OUT,'(1x,"Average permute GB/s rate : ",D25.14)') cpu_permute_bytes/(cpu_permute_time*1024d0*1024d0*1024d0)
else
write(CONS_OUT,'(1x,"Average permute GB/s rate : ",D25.14)') 0d0
endif
if(cpu_contract_time.gt.0d0) then
write(CONS_OUT,'(1x,"Average contract GFlop/s rate: ",D25.14)') cpu_flops/(cpu_contract_time*1d9)
else
write(CONS_OUT,'(1x,"Average contract GFlop/s rate: ",D25.14)') 0d0
endif
write(CONS_OUT,'("#END_MSG")')
return
end subroutine cptal_print_stats
!---------------------------------------------
#ifndef NO_PHI
!DIR$ ATTRIBUTES OFFLOAD:mic:: cmplx4_to_real4
#endif
real(4) function cmplx4_to_real4(cmplx_num) !SERIAL
!This function returns a real approximant for a complex number with the following properties:
! 1) The Euclidean (Frobenius) norm (modulus) is preserved;
! 2) The sign inversion symmetry is preserved.
implicit none
complex(4), intent(in):: cmplx_num
real(4):: real_part
real_part=real(cmplx_num)
if(real_part.ne.0.0) then
cmplx4_to_real4=abs(cmplx_num)*sign(1.0,real_part)
else
cmplx4_to_real4=aimag(cmplx_num)
endif
return
end function cmplx4_to_real4
!---------------------------------------------
#ifndef NO_PHI
!DIR$ ATTRIBUTES OFFLOAD:mic:: cmplx8_to_real8
#endif
real(8) function cmplx8_to_real8(cmplx_num) !SERIAL
!This function returns a real approximant for a complex number with the following properties:
! 1) The Euclidean (Frobenius) norm (modulus) is preserved;
! 2) The sign inversion symmetry is preserved.
implicit none
complex(8), intent(in):: cmplx_num
real(8):: real_part
real_part=real(cmplx_num,8)
if(real_part.ne.0d0) then
cmplx8_to_real8=abs(cmplx_num)*sign(1d0,real_part)
else
cmplx8_to_real8=aimag(cmplx_num)
endif
return
end function cmplx8_to_real8
!--------------------------------------------------------------------
subroutine tensor_shape_assoc(tens_shape,ierr,dims,divs,grps)
!Constructs a tensor shape object <tensor_shape_t) by pointer associating it
!with external data. Incoming <tens_shape> must be empty on entrance.
implicit none
type(tensor_shape_t), intent(inout):: tens_shape !inout: tensor shape
integer, intent(out):: ierr !out: error code (0:success)
integer, intent(in), pointer, contiguous, optional:: dims(:) !in: dimension extents (length = tensor rank)
integer, intent(in), pointer, contiguous, optional:: divs(:) !in: dimension dividers
integer, intent(in), pointer, contiguous, optional:: grps(:) !in: dimension groups
integer:: n
ierr=0
if(tens_shape%num_dim.lt.0.and.&
&(.not.(associated(tens_shape%dim_extent).or.&
&associated(tens_shape%dim_divider).or.&
&associated(tens_shape%dim_group)))) then
if(present(dims)) then
if(associated(dims)) then
n=size(dims)
if(n.ge.0.and.n.le.MAX_TENSOR_RANK) then
tens_shape%num_dim=n
if(n.gt.0) then
tens_shape%dim_extent(1:n)=>dims(1:n)
if(present(divs)) then
if(associated(divs)) then
if(size(divs).eq.n) then
tens_shape%dim_divider(1:n)=>divs(1:n)
else
ierr=1; return
endif
endif
endif
if(present(grps)) then
if(associated(grps)) then
if(size(grps).eq.n) then
tens_shape%dim_group(1:n)=>grps(1:n)
else
ierr=2; return
endif
endif
endif
endif
else
ierr=3
endif
else
tens_shape%num_dim=0
endif
else
tens_shape%num_dim=0
endif
else
ierr=4 !tensor shape is not empty
endif
return
end subroutine tensor_shape_assoc
!------------------------------------------------------------------
integer function tensor_block_layout(tens,ierr,check_shape) !SERIAL
!Returns the type of the storage layout for a given tensor block <tens>.
!INPUT:
! - tens - tensor block;
! - check_shape - (optional) if .TRUE., the tensor shape will be checked;
!OUTPUT:
! - tensor_block_layout - tensor block storage layout;
! - ierr - error code (0:sucess).
!NOTES:
! - %dim_divider(1)=0 means that an alternative (neither dimension-led nor bricked) storage layout is used,
! whose kind is regulated by the %dim_group(1) then.
! - For dimension_led, bricked_dense and bricked_ordered tensor blocks,
! index group 0 does not imply any index ordering restrictions.
! The presence of group 1,2,etc...(up to the tensor rank) assumes
! that the bricked_ordered storage layout is in use. It is illegal
! to assign non-zero group numbers to indices for dimension_led
! and bricked_dense tensor blocks.
implicit none
type(tensor_block_t), intent(inout):: tens !(out) because of <tensor_block_shape_ok>
logical, intent(in), optional:: check_shape
integer, intent(inout):: ierr
integer i,j,k,l,m,n,ibus(0:max_tensor_rank)
ierr=0; tensor_block_layout=not_allocated
if(present(check_shape)) then
if(check_shape) then; ierr=tensor_block_shape_ok(tens); if(ierr.ne.0) return; endif
endif
if(tens%tensor_shape%num_dim.gt.0.and.associated(tens%tensor_shape%dim_extent).and.&
&associated(tens%tensor_shape%dim_divider).and.associated(tens%tensor_shape%dim_group)) then !true tensor
if(tens%tensor_shape%dim_divider(1).gt.0) then !dimension-led or bricked
tensor_block_layout=dimension_led
do i=1,tens%tensor_shape%num_dim
if(tens%tensor_shape%dim_extent(i).ne.tens%tensor_shape%dim_divider(i)) then
tensor_block_layout=bricked_dense; exit
endif
enddo
if(tensor_block_layout.eq.bricked_dense) then
ibus(0:tens%tensor_shape%num_dim)=0
do i=1,tens%tensor_shape%num_dim
j=tens%tensor_shape%dim_group(i); if(j.lt.0.or.j.gt.tens%tensor_shape%num_dim) then; ierr=1000; return; endif
if(j.gt.0.and.ibus(j).gt.0) then; tensor_block_layout=bricked_ordered; exit; endif
ibus(j)=ibus(j)+1
enddo
endif
else !alternative storage layout
!`Future
endif
elseif(tens%tensor_shape%num_dim.eq.0) then !scalar tensor
tensor_block_layout=scalar_tensor
endif
return
end function tensor_block_layout
!-------------------------------------------------------------------------
integer(LONGINT) function tensor_block_shape_size(tens_block,ierr) !SERIAL
!This function determines the size of a tensor block (number of elements) by its shape.
!Note that a scalar (0-dimensional tensor) and a 1-dimensional tensor with extent 1 are not the same!
implicit none
type(tensor_block_t), intent(inout):: tens_block !(out) because of <tensor_block_layout> because of <tensor_block_shape_ok>
integer, intent(inout):: ierr
integer i,j,k,l,m,n,k0,k1,k2,k3,ks,kf,tst
ierr=0; tensor_block_shape_size=0_LONGINT
tst=tensor_block_layout(tens_block,ierr); if(ierr.ne.0) return
select case(tst)
case(not_allocated)
tensor_block_shape_size=0_LONGINT; ierr=-1
case(scalar_tensor)
tensor_block_shape_size=1_LONGINT
case(dimension_led,bricked_dense)
tensor_block_shape_size=1_LONGINT
do i=1,tens_block%tensor_shape%num_dim
if(tens_block%tensor_shape%dim_extent(i).gt.0.and.&
&tens_block%tensor_shape%dim_divider(i).gt.0.and.&
&tens_block%tensor_shape%dim_divider(i).le.tens_block%tensor_shape%dim_extent(i)) then
tensor_block_shape_size=tensor_block_shape_size*int(tens_block%tensor_shape%dim_extent(i),LONGINT)
else
ierr=100+i; return !invalid dimension specificator in tens_block%tensor_shape%
endif
enddo
case(bricked_ordered)
!`Future: Compute volumes of all ordered multi-indices and multiply them.
case(sparse_list)
!`Future
case(compressed)
!`Future
case default
ierr=-2
end select
return
end function tensor_block_shape_size
!---------------------------------------------------------------
character(2) function tensor_master_data_kind(tens,ierr) !SERIAL
!This function determines the master data kind present in a tensor block.
!INPUT:
! - tens - tensor block;
!OUTPUT:
! - tensor_master_data_kind - one of {'r4','r8','c4','c8','--'}, where the latter means "not allocated";
! - ierr - error code (0:success).
implicit none
type(tensor_block_t), intent(in):: tens
integer, intent(inout):: ierr
ierr=0; tensor_master_data_kind='--'
if(tens%tensor_shape%num_dim.eq.0) then
tensor_master_data_kind='c8'
elseif(tens%tensor_shape%num_dim.gt.0) then
if(associated(tens%data_real4)) tensor_master_data_kind='r4'
if(associated(tens%data_real8)) tensor_master_data_kind='r8'
if(associated(tens%data_cmplx4)) tensor_master_data_kind='c4'
if(associated(tens%data_cmplx8)) tensor_master_data_kind='c8'
endif
return
end function tensor_master_data_kind
!----------------------------------------------------------------------
character(2) function tensor_common_data_kind(tens1,tens2,ierr) !SERIAL
!This function determines the common data kind present in two commpatible tensor blocks.
!INPUT:
! - tens1, tens2 - compatible tensor blocks;
!OUTPUT:
! - tensor_common_data_kind - one of {'r4','r8','c4','c8','--'}, where the latter means "not applicable";
! - ierr - error code (0:success).
implicit none
type(tensor_block_t), intent(in):: tens1,tens2
integer, intent(inout):: ierr
ierr=0; tensor_common_data_kind='--'
if(tens1%tensor_shape%num_dim.eq.0.and.tens2%tensor_shape%num_dim.eq.0) then
tensor_common_data_kind='c8'
elseif(tens1%tensor_shape%num_dim.gt.0.and.tens2%tensor_shape%num_dim.gt.0) then
if(associated(tens1%data_real4).and.associated(tens2%data_real4)) tensor_common_data_kind='r4'
if(associated(tens1%data_real8).and.associated(tens2%data_real8)) tensor_common_data_kind='r8'
if(associated(tens1%data_cmplx4).and.associated(tens2%data_cmplx4)) tensor_common_data_kind='c4'
if(associated(tens1%data_cmplx8).and.associated(tens2%data_cmplx8)) tensor_common_data_kind='c8'
endif
return
end function tensor_common_data_kind
!-------------------------------------------------------------------------------------------------
logical function tensor_block_compatible(tens_in,tens_out,ierr,transp,no_check_data_kinds) !SERIAL
!This function decides whether two tensor blocks are compatible
!under some index permutation (the latter is optional).
!INPUT:
! - tens_in - input tensor;
! - tens_out - output tensor;
! - transp(0:*) - (optional) O2N index permutation;
! - no_check_data_kinds - (optional) if .TRUE., the two tensor blocks do not have to have the same data kinds allocated;
!OUTPUT:
! - tensor_block_compatible - .TRUE./.FALSE.;
! - ierr - error code (0:success).
!NOTES:
! - Non-allocated tensor blocks are all compatible.
! - Tensor block storage layouts are ignored.
implicit none
type(tensor_block_t), intent(in):: tens_in,tens_out
integer, intent(in), optional:: transp(0:*)
logical, intent(in), optional:: no_check_data_kinds
integer, intent(inout):: ierr
integer i,j,k,l,m,n,k0,k1,k2,k3,ks,kf
integer trn(0:max_tensor_rank)
integer(LONGINT) ls
logical chdtk
ierr=0; tensor_block_compatible=.TRUE.
if(tens_in%tensor_shape%num_dim.eq.tens_out%tensor_shape%num_dim) then
n=tens_in%tensor_shape%num_dim
if(n.gt.0) then
!Check tensor shapes:
if(associated(tens_in%tensor_shape%dim_extent).and.associated(tens_in%tensor_shape%dim_divider).and.&
&associated(tens_in%tensor_shape%dim_group).and.associated(tens_out%tensor_shape%dim_extent).and.&
&associated(tens_out%tensor_shape%dim_divider).and.associated(tens_out%tensor_shape%dim_group)) then
if(present(transp)) then; trn(0:n)=transp(0:n); else; trn(0:n)=(/+1,(j,j=1,n)/); endif
do i=1,n
if(tens_out%tensor_shape%dim_extent(trn(i)).ne.tens_in%tensor_shape%dim_extent(i).or.&
&tens_out%tensor_shape%dim_divider(trn(i)).ne.tens_in%tensor_shape%dim_divider(i).or.&
&tens_out%tensor_shape%dim_group(trn(i)).ne.tens_in%tensor_shape%dim_group(i)) then
tensor_block_compatible=.FALSE.
exit
endif
enddo
!Check data kinds:
if(tensor_block_compatible) then
if(tens_in%tensor_block_size.ne.tens_out%tensor_block_size) then
tensor_block_compatible=.FALSE.; ierr=1 !the same shape tensor blocks have different total sizes
else
if(present(no_check_data_kinds)) then; chdtk=no_check_data_kinds; else; chdtk=.FALSE.; endif
if(.not.chdtk) then
if((associated(tens_in%data_real4).and.(.not.associated(tens_out%data_real4))).or.&
&((.not.associated(tens_in%data_real4)).and.associated(tens_out%data_real4))) then
tensor_block_compatible=.FALSE.; return
else
if(associated(tens_in%data_real4)) then
ls=size(tens_in%data_real4,kind=8)
if(size(tens_out%data_real4,kind=8).ne.ls.or.tens_out%tensor_block_size.ne.ls) then
tensor_block_compatible=.FALSE.; ierr=2; return
endif
endif
endif
if((associated(tens_in%data_real8).and.(.not.associated(tens_out%data_real8))).or.&
&((.not.associated(tens_in%data_real8)).and.associated(tens_out%data_real8))) then
tensor_block_compatible=.FALSE.; return
else
if(associated(tens_in%data_real8)) then
ls=size(tens_in%data_real8,kind=8)
if(size(tens_out%data_real8,kind=8).ne.ls.or.tens_out%tensor_block_size.ne.ls) then
tensor_block_compatible=.FALSE.; ierr=3; return
endif
endif
endif
if((associated(tens_in%data_cmplx4).and.(.not.associated(tens_out%data_cmplx4))).or.&
&((.not.associated(tens_in%data_cmplx4)).and.associated(tens_out%data_cmplx4))) then
tensor_block_compatible=.FALSE.; return
else
if(associated(tens_in%data_cmplx4)) then
ls=size(tens_in%data_cmplx4,kind=8)
if(size(tens_out%data_cmplx4,kind=8).ne.ls.or.tens_out%tensor_block_size.ne.ls) then
tensor_block_compatible=.FALSE.; ierr=4; return
endif
endif
endif
if((associated(tens_in%data_cmplx8).and.(.not.associated(tens_out%data_cmplx8))).or.&
&((.not.associated(tens_in%data_cmplx8)).and.associated(tens_out%data_cmplx8))) then
tensor_block_compatible=.FALSE.; return
else
if(associated(tens_in%data_cmplx8)) then
ls=size(tens_in%data_cmplx8,kind=8)
if(size(tens_out%data_cmplx8,kind=8).ne.ls.or.tens_out%tensor_block_size.ne.ls) then
tensor_block_compatible=.FALSE.; ierr=5; return
endif
endif
endif
endif
endif
endif
else
tensor_block_compatible=.FALSE.; ierr=6 !some of the %tensor_shape arrays were not allocated
endif
endif
else
tensor_block_compatible=.FALSE.
endif
return
end function tensor_block_compatible
!------------------------------------------------------------------
subroutine tensor_block_mimic(tens_in,tens_out,ierr,transp) !SERIAL
!This subroutine copies the internal structure of a tensor block without copying the actual data.
!Optionally, it can also initialize the tensor shape according to a given permutation (O2N).
!INPUT:
! - tens_in - tensor block being mimicked;
! - transp(0:) - (optional) if present, the tensor shape will also be initialized according to this permutation (O2N);
!OUTPUT:
! - tens_out - output tensor block;
! - ierr - error code (0:success).
!NOTES:
! - Tensor block storage layouts are ignored.
implicit none
type(tensor_block_t), intent(in):: tens_in
type(tensor_block_t), intent(inout):: tens_out
integer, intent(in), optional:: transp(0:*)
integer, intent(inout):: ierr
integer i,j,k,l,m,n,k0,k1,k2,k3,ks,kf
logical res
ierr=0
n=tens_in%tensor_shape%num_dim
if(tens_out%tensor_shape%num_dim.ne.n.or.tens_out%tensor_block_size.ne.tens_in%tensor_block_size) then
call tensor_block_destroy(tens_out,ierr); if(ierr.ne.0) then; ierr=1; return; endif
!Allocate tensor shape:
if(n.gt.0) then
allocate(tens_out%tensor_shape%dim_extent(1:n),STAT=ierr); if(ierr.ne.0) then; ierr=2; return; endif
allocate(tens_out%tensor_shape%dim_divider(1:n),STAT=ierr); if(ierr.ne.0) then; ierr=3; return; endif
allocate(tens_out%tensor_shape%dim_group(1:n),STAT=ierr); if(ierr.ne.0) then; ierr=4; return; endif
res=tensor_block_alloc(tens_out,'sp',ierr,.TRUE.); if(ierr.ne.0) then; ierr=5; return; endif
endif
tens_out%tensor_shape%num_dim=n; tens_out%tensor_block_size=tens_in%tensor_block_size
endif
if(n.gt.0) then
if(present(transp)) then !adopt the tensor shape in full according to the given permutation (O2N)
tens_out%tensor_shape%dim_extent(transp(1:n))=tens_in%tensor_shape%dim_extent(1:n)
tens_out%tensor_shape%dim_divider(transp(1:n))=tens_in%tensor_shape%dim_divider(1:n)
tens_out%tensor_shape%dim_group(transp(1:n))=tens_in%tensor_shape%dim_group(1:n)
endif
!Allocate data arrays, if needed:
!REAL4:
if(associated(tens_in%data_real4)) then
if(size(tens_in%data_real4,kind=8).eq.tens_in%tensor_block_size) then
if(.not.associated(tens_out%data_real4)) then
! allocate(tens_out%data_real4(0:tens_in%tensor_block_size-1),STAT=ierr)
ierr=array_alloc(tens_out%data_real4,tens_in%tensor_block_size,base=0_LONGINT)
if(ierr.ne.0) then; ierr=6; return; endif
res=tensor_block_alloc(tens_out,'r4',ierr,.TRUE.); if(ierr.ne.0) then; ierr=7; return; endif
endif
else
ierr=8; return
endif
endif
!REAL8:
if(associated(tens_in%data_real8)) then
if(size(tens_in%data_real8,kind=8).eq.tens_in%tensor_block_size) then
if(.not.associated(tens_out%data_real8)) then
! allocate(tens_out%data_real8(0:tens_in%tensor_block_size-1),STAT=ierr)
ierr=array_alloc(tens_out%data_real8,tens_in%tensor_block_size,base=0_LONGINT)
if(ierr.ne.0) then; ierr=9; return; endif
res=tensor_block_alloc(tens_out,'r8',ierr,.TRUE.); if(ierr.ne.0) then; ierr=10; return; endif
endif
else
ierr=11; return
endif
endif
!CMPLX4:
if(associated(tens_in%data_cmplx4)) then
if(size(tens_in%data_cmplx4,kind=8).eq.tens_in%tensor_block_size) then
if(.not.associated(tens_out%data_cmplx4)) then
! allocate(tens_out%data_cmplx4(0:tens_in%tensor_block_size-1),STAT=ierr)
ierr=array_alloc(tens_out%data_cmplx4,tens_in%tensor_block_size,base=0_LONGINT)
if(ierr.ne.0) then; ierr=12; return; endif
res=tensor_block_alloc(tens_out,'c4',ierr,.TRUE.); if(ierr.ne.0) then; ierr=13; return; endif
endif
else
ierr=14; return
endif
endif
!CMPLX8:
if(associated(tens_in%data_cmplx8)) then
if(size(tens_in%data_cmplx8,kind=8).eq.tens_in%tensor_block_size) then
if(.not.associated(tens_out%data_cmplx8)) then
! allocate(tens_out%data_cmplx8(0:tens_in%tensor_block_size-1),STAT=ierr)
ierr=array_alloc(tens_out%data_cmplx8,tens_in%tensor_block_size,base=0_LONGINT)
if(ierr.ne.0) then; ierr=15; return; endif
res=tensor_block_alloc(tens_out,'c8',ierr,.TRUE.); if(ierr.ne.0) then; ierr=16; return; endif
endif
else
ierr=17; return
endif
endif
endif
return
end subroutine tensor_block_mimic
!------------------------------------------------------------------------------------------------------
subroutine tensor_block_create(shape_str,data_kind,tens_block,ierr,val_r4,val_r8,val_c4,val_c8) !PARALLEL
!This subroutine creates a tensor block <tens_block> based on the tensor shape specification string (TSSS) <shape_str>.
!FORMAT of <shape_str>:
!"(E1/D1{G1},E2/D2{G2},...)":
! Ex is the extent of the dimension x (segment);
! /Dx specifies an optional segment divider for the dimension x (lm_segment_size), 1<=Dx<=Ex (DEFAULT = Ex);
! Ex MUST be a multiple of Dx.
! {Gx} optionally specifies the symmetric group the dimension belongs to, Gx>=0 (default group 0 has no symmetry ordering).
! Dimensions grouped together (group#>0) will obey a non-descending ordering from left to right.
!By default, the 1st dimension is the most minor one while the last is the most senior (Fortran-like).
!If the number of dimensions equals to zero, the %scalar_value field will be initialized.
!INPUT:
! - shape_str - tensor shape specification string (SSS);
! - data_kind - requested data kind, one of {"r4","r8","c4","c8"};
! - tens_block - tensor block;
! - val_r4/val_r8/val_c4/val_c8 - (optional) initialization value for different data kinds (otherwise a random fill will be invoked);
!OUTPUT:
! - tens_block - filled tensor_block;
! - ierr - error code (0: success);
!NOTES:
! - If the tensor block has already been allocated before, it will be reshaped and reinitialized.
! - If ordered dimensions are present, the data feed may not reflect a proper symmetry (antisymmetry)!
implicit none
character(*), intent(in):: shape_str
character(2), intent(in):: data_kind
type(tensor_block_t), intent(inout):: tens_block
real(4), intent(in), optional:: val_r4
real(8), intent(in), optional:: val_r8
complex(4), intent(in), optional:: val_c4
complex(8), intent(in), optional:: val_c8
integer, intent(inout):: ierr
ierr=0
call tensor_block_shape_create(tens_block,shape_str,ierr); if(ierr.ne.0) then; ierr=1; return; endif
ierr=tensor_block_shape_ok(tens_block); if(ierr.ne.0) then; ierr=2; return; endif
select case(data_kind)
case('r4','R4')
if(present(val_r4)) then
call tensor_block_init(data_kind,tens_block,ierr,val_r4=val_r4); if(ierr.ne.0) then; ierr=3; return; endif
else
if(present(val_r8)) then
call tensor_block_init(data_kind,tens_block,ierr,val_r8=val_r8); if(ierr.ne.0) then; ierr=4; return; endif
else
if(present(val_c4)) then
call tensor_block_init(data_kind,tens_block,ierr,val_c4=val_c4); if(ierr.ne.0) then; ierr=5; return; endif
else
if(present(val_c8)) then
call tensor_block_init(data_kind,tens_block,ierr,val_c8=val_c8); if(ierr.ne.0) then; ierr=6; return; endif
else
call tensor_block_init(data_kind,tens_block,ierr); if(ierr.ne.0) then; ierr=7; return; endif
endif
endif
endif
endif
case('r8','R8')
if(present(val_r8)) then
call tensor_block_init(data_kind,tens_block,ierr,val_r8=val_r8); if(ierr.ne.0) then; ierr=8; return; endif
else
if(present(val_r4)) then
call tensor_block_init(data_kind,tens_block,ierr,val_r4=val_r4); if(ierr.ne.0) then; ierr=9; return; endif
else
if(present(val_c4)) then
call tensor_block_init(data_kind,tens_block,ierr,val_c4=val_c4); if(ierr.ne.0) then; ierr=10; return; endif
else
if(present(val_c8)) then
call tensor_block_init(data_kind,tens_block,ierr,val_c8=val_c8); if(ierr.ne.0) then; ierr=11; return; endif
else
call tensor_block_init(data_kind,tens_block,ierr); if(ierr.ne.0) then; ierr=12; return; endif
endif
endif
endif
endif
case('c4','C4')
if(present(val_c4)) then
call tensor_block_init(data_kind,tens_block,ierr,val_c4=val_c4); if(ierr.ne.0) then; ierr=13; return; endif
else
if(present(val_c8)) then
call tensor_block_init(data_kind,tens_block,ierr,val_c8=val_c8); if(ierr.ne.0) then; ierr=14; return; endif
else
if(present(val_r8)) then
call tensor_block_init(data_kind,tens_block,ierr,val_r8=val_r8); if(ierr.ne.0) then; ierr=15; return; endif
else
if(present(val_r4)) then
call tensor_block_init(data_kind,tens_block,ierr,val_r4=val_r4); if(ierr.ne.0) then; ierr=16; return; endif
else
call tensor_block_init(data_kind,tens_block,ierr); if(ierr.ne.0) then; ierr=17; return; endif
endif
endif
endif
endif
case('c8','C8')
if(present(val_c8)) then
call tensor_block_init(data_kind,tens_block,ierr,val_c8=val_c8); if(ierr.ne.0) then; ierr=18; return; endif
else
if(present(val_c4)) then
call tensor_block_init(data_kind,tens_block,ierr,val_c4=val_c4); if(ierr.ne.0) then; ierr=19; return; endif
else
if(present(val_r8)) then
call tensor_block_init(data_kind,tens_block,ierr,val_r8=val_r8); if(ierr.ne.0) then; ierr=20; return; endif
else
if(present(val_r4)) then
call tensor_block_init(data_kind,tens_block,ierr,val_r4=val_r4); if(ierr.ne.0) then; ierr=21; return; endif
else
call tensor_block_init(data_kind,tens_block,ierr); if(ierr.ne.0) then; ierr=22; return; endif
endif
endif
endif
endif
case default
ierr=23
end select
return
end subroutine tensor_block_create
!------------------------------------------------------------------------------------------
subroutine tensor_block_init(data_kind,tens_block,ierr,val_r4,val_r8,val_c4,val_c8) !PARALLEL
!This subroutine initializes a tensor block <tens_block> with either some value or random numbers.
!INPUT:
! - data_kind - requested data kind, one of {"r4","r8","c4","c8"};
! - tens_block - tensor block;
! - val_r4/val_r8/val_c4/val_c8 - (optional) if present, the tensor block is assigned the value <val> (otherwise, a random fill);
!OUTPUT:
! - tens_block - filled tensor block;
! - ierr - error code (0: success):
! -1: invalid (negative) tensor rank;
! -2: negative tensor size returned;
! x>0: invalid <tensor_shape> (zero/negative xth dimension extent);
! 666: invalid <data_kind>;
! 667: memory allocation failed;
!NOTES:
! - For tensors with a non-zero rank, the %scalar_value field will be set to the Euclidean norm of the tensor block.
! - Scalar tensors will be initialized with the <val_XX> value (if present), regardless of the <data_kind>.
! - In general, a tensor block may have dimension ordering (symmetry) restrictions.
! In this case, the number fill done here might not reflect the proper symmetry (e.g., antisymmetry)!
implicit none
!-----------------------------------------------------
integer(LONGINT), parameter:: chunk_size=2**10
integer(LONGINT), parameter:: vec_size=2**8
!--------------------------------------------------
character(2), intent(in):: data_kind
type(tensor_block_t), intent(inout):: tens_block
real(4), intent(in), optional:: val_r4
real(8), intent(in), optional:: val_r8
complex(4), intent(in), optional:: val_c4
complex(8), intent(in), optional:: val_c8
integer, intent(inout):: ierr
integer i,j,k,l,m,n,k0,k1,k2,k3,k4,ks,kf
integer(LONGINT) tens_size,l0,l1
real(8) vec_r8(0:vec_size-1),valr8,val,rnd_buf(2)
real(4) vec_r4(0:vec_size-1),valr4
complex(4) vec_c4(0:vec_size-1),valc4
complex(8) vec_c8(0:vec_size-1),valc8
logical res
ierr=0; tens_block%tensor_block_size=tensor_block_shape_size(tens_block,ierr)
if(ierr.ne.0) then; ierr=1; return; endif
if(tens_block%tensor_block_size.le.0_LONGINT) then; ierr=2; return; endif
if(tens_block%tensor_shape%num_dim.eq.0) then !scalar tensor
if(associated(tens_block%data_real4)) then
if(tensor_block_alloc(tens_block,'r4',ierr)) then
if(ierr.ne.0) then; ierr=3; return; endif