fortran-caffeine-0.7.2/0000775000175000017500000000000015162256535015127 5ustar alastairalastairfortran-caffeine-0.7.2/app/0000775000175000017500000000000015162221361015674 5ustar alastairalastairfortran-caffeine-0.7.2/app/native-multi-image.F900000664000175000017500000000714315162221361021657 0ustar alastairalastair! This multi-image Fortran program just exercises basic calls into each of the ! native multi-image features from the Fortran level. ! This test requires a compiler with multi-image features (possibly via Caffeine). ! This program is NOT designed to evaluate runtime correctness, just to exercise ! some basic calls to the features. program native_multi_image #if HAVE_MULTI_IMAGE ! feature control: #ifndef HAVE_SYNC #define HAVE_SYNC 1 #endif #ifndef HAVE_SYNC_ALL #define HAVE_SYNC_ALL HAVE_SYNC #endif #ifndef HAVE_SYNC_MEMORY #define HAVE_SYNC_MEMORY HAVE_SYNC #endif #ifndef HAVE_SYNC_IMAGES #define HAVE_SYNC_IMAGES HAVE_SYNC #endif #ifndef HAVE_COLLECTIVES #define HAVE_COLLECTIVES 1 #endif #ifndef HAVE_CO_SUM #define HAVE_CO_SUM HAVE_COLLECTIVES #endif #ifndef HAVE_CO_MIN #define HAVE_CO_MIN HAVE_COLLECTIVES #endif #ifndef HAVE_CO_MAX #define HAVE_CO_MAX HAVE_COLLECTIVES #endif #ifndef HAVE_CO_BROADCAST #define HAVE_CO_BROADCAST HAVE_COLLECTIVES #endif #ifndef HAVE_TEAM #define HAVE_TEAM 1 #endif USE, INTRINSIC :: ISO_FORTRAN_ENV integer :: me, ni, peer, tmp character(len=5) :: c # if HAVE_TEAM integer :: team_id type(TEAM_TYPE) :: subteam, res # endif me = THIS_IMAGE() ni = NUM_IMAGES() peer = MIN(IEOR(me-1,1)+1, ni) write(*,'(A,I1,A,I1,A)') "Hello, world! From image ", me, " of ", ni, " images" # if SET_EXCEPTIONS block ! deliberately trigger IEEE arithmetic exceptions: INEXACT and UNDERFLOW real :: r r = 1e-30 r = r + r * r write (*,*) r end block # endif # if HAVE_SYNC_ALL call status("Testing SYNC ALL...") call sync_all # endif # if HAVE_SYNC_MEMORY call status("Testing SYNC MEMORY...") SYNC MEMORY # endif # if HAVE_SYNC_IMAGES call status("Testing SYNC IMAGES...") SYNC IMAGES(*) SYNC IMAGES(peer) SYNC IMAGES([peer]) if (me /= peer) SYNC IMAGES([me, peer]) #endif tmp = me c = "hello" # if HAVE_CO_SUM call status("Testing CO_SUM...") call CO_SUM(tmp) call CO_SUM(tmp,1) # endif # if HAVE_CO_MIN call status("Testing CO_MIN...") call CO_MIN(tmp) call CO_MIN(tmp,1) call CO_MIN(c) call CO_MIN(c,1) # endif # if HAVE_CO_MAX call status("Testing CO_MAX...") call CO_MAX(tmp) call CO_MAX(tmp,1) call CO_MAX(c) call CO_MAX(c,1) # endif # if HAVE_CO_BROADCAST call status("Testing CO_BROADCAST...") call CO_BROADCAST(tmp,1) call CO_BROADCAST(c,1) # endif # if HAVE_TEAM call status("Testing TEAMS...") res = GET_TEAM(CURRENT_TEAM) res = GET_TEAM(INITIAL_TEAM) res = GET_TEAM() write(*,'(A,I3)') "Initial team number is ", TEAM_NUMBER() team_id = merge(1, 2, me <= (ni+1)/2) FORM TEAM(team_id, subteam) SYNC TEAM(subteam) CHANGE TEAM(subteam) write(*,'(A,I3,A,I3,A,I3)') 'Inside CHANGE TEAM construct: ', THIS_IMAGE(), ' of ', NUM_IMAGES(), ' in team number ', TEAM_NUMBER() END TEAM call sync_all write(*,'(A,I3)') "After END TEAM statement, TEAM_NUMBER() is ", TEAM_NUMBER() # endif call sync_all write(*,'(A,I1,A,I1,A)') "Goodbye from image ", me, " of ", ni, " images" ! explicit flush for now until we have multi-image stop support call flush_all call sync_all stop contains subroutine sync_all # if HAVE_SYNC_ALL SYNC ALL # endif end subroutine subroutine flush_all flush output_unit flush error_unit end subroutine subroutine status(str) character(len=*) :: str call flush_all call sync_all if (THIS_IMAGE() == 1) write(*,*) str call flush_all call sync_all end subroutine #else stop "Native multi-image test disabled" #endif end program fortran-caffeine-0.7.2/src/0000775000175000017500000000000015162221361015703 5ustar alastairalastairfortran-caffeine-0.7.2/src/caffeine/0000775000175000017500000000000015162221361017443 5ustar alastairalastairfortran-caffeine-0.7.2/src/caffeine/co_broadcast_s.F900000664000175000017500000000230515162221361022670 0ustar alastairalastair! Copyright (c), The Regents of the University of California ! Terms of use are as specified in LICENSE.txt #include "assert_macros.h" submodule(prif:prif_private_s) co_broadcast_s ! DO NOT ADD USE STATEMENTS HERE ! All use statements belong in prif_private_s.F90 implicit none contains module procedure prif_co_broadcast call_assert(source_image >= 1 .and. source_image <= current_team%info%num_images) call contiguous_co_broadcast(a, source_image, stat, errmsg, errmsg_alloc) end procedure subroutine contiguous_co_broadcast(a, source_image, stat, errmsg, errmsg_alloc) type(*), intent(inout), target, contiguous :: a(..) integer(c_int), intent(in) :: source_image integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc if (present(stat)) stat=0 call caf_co_broadcast(a, source_image, product(shape(a)), current_team%info%gex_team) ! With a compliant Fortran 2018 compiler, pass in c_sizeof(a) as the `Nelem` argument ! and eliminate the calculation of num_elements*sizeof(a) in caffeine.c. end subroutine end submodule co_broadcast_s fortran-caffeine-0.7.2/src/caffeine/caffeine.c0000664000175000017500000005312015162221361021350 0ustar alastairalastair// Copyright (c), The Regents of the University of California // Terms of use are as specified in LICENSE.txt #include #include #include #include #include #include #include #include #include #include #include "gasnet_safe.h" #include #include #include #include "../dlmalloc/dl_malloc_caf.h" #include "../dlmalloc/dl_malloc.h" #include "caffeine-internal.h" enum { UNRECOGNIZED_TYPE, ERRMSG_TOO_SHORT }; static gex_Client_t myclient; static gex_EP_t myep; static gex_Rank_t myproc, numprocs; static gex_Segment_t mysegment; static gex_TM_t myworldteam; typedef void(*final_func_ptr)(void*, size_t) ; typedef uint8_t byte; static void event_init(void); static void atomic_init(void); // --------------------------------------------------- // Floating-point exception support #ifndef IEEE_FE_MASK #define IEEE_FE_MASK FE_INEXACT #endif static fexcept_t fe_flag_save; void caf_fe_save(void) { fegetexceptflag(&fe_flag_save, IEEE_FE_MASK); } void caf_fe_restore(void) { fesetexceptflag(&fe_flag_save, IEEE_FE_MASK); } #define CHECK_INEXACT() \ printf("%3i: inexact flag = %s\n",__LINE__,fetestexcept(FE_INEXACT) & FE_INEXACT ? "YES" : "no") // --------------------------------------------------- int caf_this_image(gex_TM_t tm) { return gex_TM_QueryRank(tm) + 1; } int caf_num_images(gex_TM_t tm) { return gex_TM_QuerySize(tm); } // Given team and corresponding image_num, return image number in the initial team int caf_image_to_initial(gex_TM_t tm, int image_num) { assert(image_num >= 1); assert(image_num <= gex_TM_QuerySize(tm)); gex_Rank_t proc = gex_TM_TranslateRankToJobrank(tm, image_num-1); return proc + 1; } // Given image number in the initial team, return image number corresponding to given team int caf_image_from_initial(gex_TM_t tm, int image_num) { assert(image_num >= 1); assert(image_num <= numprocs); gex_Rank_t proc = gex_TM_TranslateJobrankToRank(tm, image_num-1); // GEX_RANK_INVALID indicates the provided image_num in initial team is not part of tm assert(proc != GEX_RANK_INVALID); return proc + 1; } // --------------------------------------------------- // NOTE: gex_TM_T is a typedef to a C pointer, so the `gex_TM_t* initial_team` arg in the C signature matches the BIND(C) interface of an `intent(out)` arg of type `c_ptr` for the same argument void caf_caffeinate( intptr_t* total_heap_size, mspace* symmetric_heap, intptr_t* symmetric_heap_start, intptr_t* symmetric_heap_size, mspace* non_symmetric_heap, gex_TM_t* initial_team ) { GASNET_SAFE(gex_Client_Init(&myclient, &myep, &myworldteam, "caffeine", NULL, NULL, 0)); myproc = gex_TM_QueryRank(myworldteam); numprocs = gex_TM_QuerySize(myworldteam); *initial_team = myworldteam; #define PAGE_ALIGNUP(sz) ((sz + GASNET_PAGESIZE - 1) & ~(GASNET_PAGESIZE-1)) // query largest possible segment GASNet can give us of the same size across all processes: uintptr_t max_seg = gasnet_getMaxGlobalSegmentSize(); // impose a reasonable default size #ifndef CAF_DEFAULT_HEAP_SIZE #define CAF_DEFAULT_HEAP_SIZE (128*1024*1024) // 128 MiB #endif uintptr_t default_seg = MIN(max_seg, CAF_DEFAULT_HEAP_SIZE); // retrieve user preference, defaulting to the above and units of MiB uintptr_t segsz = gasnett_getenv_int_withdefault("CAF_HEAP_SIZE", default_seg, 1024*1024); // ensure at least two full pages segsz = MAX(segsz,2*GASNET_PAGESIZE); // round-up to closest page size segsz = PAGE_ALIGNUP(segsz); // cap user request to the largest available: // TODO: issue a console warning here instead of silently capping segsz = MIN(segsz,max_seg); assert(segsz % GASNET_PAGESIZE == 0); GASNET_SAFE(gex_Segment_Attach(&mysegment, myworldteam, segsz)); *symmetric_heap_start = (intptr_t)gex_Segment_QueryAddr(mysegment); *total_heap_size = gex_Segment_QuerySize(mysegment); assert(*total_heap_size >= 2*GASNET_PAGESIZE); #ifndef CAF_DEFAULT_COMP_FRAC #define CAF_DEFAULT_COMP_FRAC 0.1f // 10% #endif float default_comp_frac = MAX(MIN(0.99f, CAF_DEFAULT_COMP_FRAC), 0.01f); float non_symmetric_fraction = gasnett_getenv_dbl_withdefault("CAF_COMP_FRAC", default_comp_frac); if (non_symmetric_fraction <= 0 || non_symmetric_fraction >= 1) { gasnett_fatalerror_nopos("If used, environment variable 'CAF_COMP_FRAC' must be a valid floating point value or fraction between 0 and 1."); } uintptr_t non_symmetric_heap_size = *total_heap_size * non_symmetric_fraction; non_symmetric_heap_size = PAGE_ALIGNUP(non_symmetric_heap_size); *symmetric_heap_size = *total_heap_size - non_symmetric_heap_size; if (*symmetric_heap_size == 0) { assert(non_symmetric_heap_size > GASNET_PAGESIZE); non_symmetric_heap_size -= GASNET_PAGESIZE; *symmetric_heap_size += GASNET_PAGESIZE; } assert(non_symmetric_heap_size > 0); assert(non_symmetric_heap_size % GASNET_PAGESIZE == 0); assert(*symmetric_heap_size > 0); assert(*symmetric_heap_size % GASNET_PAGESIZE == 0); intptr_t non_symmetric_heap_start = *symmetric_heap_start + *symmetric_heap_size; if (myproc == 0) { *symmetric_heap = create_mspace_with_base((void*)*symmetric_heap_start, *symmetric_heap_size, 0); assert(*symmetric_heap); mspace_set_footprint_limit(*symmetric_heap, *symmetric_heap_size); } *non_symmetric_heap = create_mspace_with_base((void*)non_symmetric_heap_start, non_symmetric_heap_size, 0); assert(*non_symmetric_heap); mspace_set_footprint_limit(*non_symmetric_heap, non_symmetric_heap_size); // init various subsystems: atomic_init(); event_init(); } void caf_decaffeinate(int exit_code) { gasnet_exit(exit_code); } void caf_fail_image() { fprintf(stderr,"FAIL IMAGE on image %d\n", myproc+1); gasnett_flush_streams(); if (numprocs > 1) { // spin-wait until we are killed, while still servicing network requests: GASNET_BLOCKUNTIL((gasnett_nsleep(1000), 0)); } gasnet_exit(1); } void caf_fatal_error( const CFI_cdesc_t* Fstr ) { const char *msg = (char *)Fstr->base_addr; int len = Fstr->elem_len; //printf("%p:%i\n",msg,len); fflush(0); gasnett_fatalerror_nopos("%.*s", len, msg); } void* caf_allocate(mspace heap, size_t bytes) { void* allocated_space = mspace_memalign(heap, 8, bytes); return allocated_space; } void caf_allocate_remaining(mspace heap, void** allocated_space, size_t* allocated_size) { // The following doesn't necessarily give us all remaining space // nor necessarily the largest open space, but in practice is likely // to work out that way struct mallinfo heap_info = mspace_mallinfo(heap); // clang's implementation of nearbyint() raises FE_INEXACT, // in direct contradiction to its specified purpose. // Workaround this defect by saving and restoring the FE flags caf_fe_save(); *allocated_size = (size_t)nearbyint(heap_info.keepcost * 0.9f); caf_fe_restore(); *allocated_space = mspace_memalign(heap, 8, *allocated_size); if (!*allocated_space) // uh-oh, something went wrong.. gasnett_fatalerror("caf_allocate_remaining failed to mspace_memalign(%"PRIuSZ")", *allocated_size); } void caf_deallocate(mspace heap, void* mem) { mspace_free(heap, mem); } void caf_establish_mspace(mspace* heap, void* heap_start, size_t heap_size) { *heap = create_mspace_with_base(heap_start, heap_size, 0); mspace_set_footprint_limit(*heap, heap_size); } // take address in a segment and convert to an address on given image intptr_t caf_convert_base_addr(void* addr, int image) { ptrdiff_t offset = (byte*)addr - (byte*)gex_Segment_QueryAddr(mysegment); void* segment_start_remote_image = NULL; gex_Event_Wait(gex_EP_QueryBoundSegmentNB(myworldteam, image - 1, &segment_start_remote_image, NULL, NULL, 0)); return (intptr_t)((byte*)segment_start_remote_image + offset); } // _______________________ Contiguous RMA ____________________________ void caf_put(int image, intptr_t dest, void* src, size_t size) { gex_RMA_PutBlocking(myworldteam, image-1, (void*)dest, src, size, 0); } void caf_get(int image, void* dest, intptr_t src, size_t size) { gex_RMA_GetBlocking(myworldteam, dest, image-1, (void*)src, size, 0); } // _______________________ Strided RMA ____________________________ void caf_put_strided(int dims, int image_num, intptr_t remote_ptr, void* remote_stride, void *current_image_buffer, void * current_image_stride, size_t element_size, void *extent) { gex_VIS_StridedPutBlocking(myworldteam, image_num-1, (void *)remote_ptr, remote_stride, current_image_buffer, current_image_stride, element_size, extent, dims, 0); } void caf_get_strided(int dims, int image_num, intptr_t remote_ptr, void* remote_stride, void *current_image_buffer, void * current_image_stride, size_t element_size, void *extent) { gex_VIS_StridedGetBlocking(myworldteam, current_image_buffer, current_image_stride, image_num-1, (void *)remote_ptr, remote_stride, element_size, extent, dims, 0); } //------------------------------------------------------------------- // caf_segment_release() is invoked whenever this image is ending a // segment, to flush any pending actions that are specified to be // ordered before a subsequent segment. void caf_segment_release() { // synchronize caf_event_post: gex_NBI_Wait(GEX_EC_RMW, 0); } void caf_sync_memory() { caf_segment_release(); gasnett_local_mb(); } void caf_sync_team( gex_TM_t team ) { caf_segment_release(); gex_Event_Wait( gex_Coll_BarrierNB(team, 0) ); } // _______________________ Events ____________________________ static gex_AD_t event_AD = GEX_AD_INVALID; static void event_init(void) { assert(event_AD == GEX_AD_INVALID); // create the event AD and request CPU/AM transport gex_AD_Create(&event_AD, myworldteam, GEX_DT_I64, GEX_OP_GET | GEX_OP_INC | GEX_OP_FSUB, GEX_FLAG_AD_FAVOR_MY_RANK); assert(event_AD != GEX_AD_INVALID); } void caf_event_post(int image, intptr_t event_var_ptr, int segment_boundary, int release_fence) { assert(event_AD != GEX_AD_INVALID); assert(event_var_ptr); // arrange for requested fencing gex_Flags_t flags; if (segment_boundary) { caf_segment_release(); flags = GEX_FLAG_AD_REL | GEX_FLAG_AD_ACQ; } else if (release_fence) { flags = GEX_FLAG_AD_REL; } else { flags = 0; } gex_AD_OpNBI_I64(event_AD, NULL, image-1, (void *)event_var_ptr, GEX_OP_INC, 0, 0, flags); // We've issued the post increment as an NBI operation, // allowing this call to return before the increment // is acknowledged by the remote side. // This will later be synchronized in caf_segment_release() } void caf_event_query(void *event_var_ptr, int64_t *count) { assert(event_AD != GEX_AD_INVALID); assert(event_var_ptr); assert(count); gex_Event_Wait( gex_AD_OpNB_I64(event_AD, count, myproc, event_var_ptr, GEX_OP_GET, 0, 0, 0) ); } void caf_event_wait(void *event_var_ptr, int64_t threshold, int segment_boundary, int acquire_fence) { assert(event_AD != GEX_AD_INVALID); assert(event_var_ptr); assert(threshold >= 1); // arrange for requested fencing gex_Flags_t flags; if (segment_boundary) { caf_segment_release(); gasnett_local_wmb(); // release fence synchronously (before wait loop) flags = GEX_FLAG_AD_ACQ; } else if (acquire_fence) { flags = GEX_FLAG_AD_ACQ; } else { flags = 0; } int64_t cnt = 0; while (caf_event_query(event_var_ptr, &cnt), cnt < threshold) { // issue #222 : TODO: we probably want to insert a wait hook here gasnet_AMPoll(); } gex_Event_Wait( gex_AD_OpNB_I64(event_AD, &cnt, myproc, event_var_ptr, GEX_OP_FSUB, threshold, 0, flags) ); assert(cnt >= threshold); } // _______________________ Atomics ____________________________ #define OPMAP(name) [CAF_CONCAT2(CAF_OP_,name)] = CAF_CONCAT2(GEX_OP_,name) static gex_OP_t const op_map[] = { OPMAP(GET), OPMAP(SET), OPMAP(ADD), OPMAP(AND), OPMAP(OR), OPMAP(XOR), OPMAP(FADD), OPMAP(FAND), OPMAP(FOR), OPMAP(FXOR), OPMAP(FCAS), }; static gex_AD_t atomic_AD = GEX_AD_INVALID; static void atomic_init(void) { assert(atomic_AD == GEX_AD_INVALID); // create the atomic AD gex_AD_Create(&atomic_AD, myworldteam, GEX_DT_I64, GEX_OP_GET | GEX_OP_SET | GEX_OP_ADD | GEX_OP_FADD | GEX_OP_AND | GEX_OP_FAND | GEX_OP_OR | GEX_OP_FOR | GEX_OP_XOR | GEX_OP_FXOR | GEX_OP_FCAS, 0); // TODO: allow user control over GEX_FLAG_AD_FAVOR_* flags? assert(atomic_AD != GEX_AD_INVALID); } void caf_atomic_int(int opcode, int image, void* addr, int64_t *result, int64_t op1, int64_t op2) { assert(atomic_AD != GEX_AD_INVALID); assert(addr); assert(opcode >= 0 && opcode < sizeof(op_map)/sizeof(op_map[0])); gex_OP_t op = op_map[opcode]; gex_Event_Wait( gex_AD_OpNB_I64(atomic_AD, result, image-1, addr, op, op1, op2, GEX_FLAG_RANK_IS_JOBRANK) ); // DOB: We could very easily insert memory fencing into the AMO operation above // via GEX_FLAG_AD_ACQ | GEX_FLAG_AD_REL, incurring an associated performance penalty // (most notably for same-node images communicating via shared-memory transport). // However based on my reading of the informal hand-waving in F23 C.12.1 "Atomic memory consistency", // such fencing is neither required nor guaranteed by the language. // As such we leave the AMO unfenced and rely on the fences at surrounding // memory segment boundaries to provide the required ordering semantics. } void caf_atomic_logical(int opcode, int image, void* addr, int64_t *result, int64_t op1, int64_t op2) { caf_atomic_int(opcode, image, addr, result, op1, op2); } //------------------------------------------------------------------- void caf_co_reduce( CFI_cdesc_t* a_desc, int result_image, size_t num_elements, gex_Coll_ReduceFn_t user_op, void* client_data, gex_TM_t team ) { assert(a_desc); assert(result_image >= 0); assert(num_elements > 0); assert(user_op); #if PLATFORM_COMPILER_GNU // gfortran 13.2 & 14 - c_funloc is non-compliant // it erroneously generates a non-callable pointer to a pointer to the subroutine // Here we undo that incorrect extra level of indirection user_op = *(gex_Coll_ReduceFn_t *)user_op; #endif char* a_address = (char*) a_desc->base_addr; size_t c_sizeof_a = a_desc->elem_len; gex_Event_t ev; if (result_image) { ev = gex_Coll_ReduceToOneNB( team, result_image-1, a_address, a_address, GEX_DT_USER, c_sizeof_a, num_elements, GEX_OP_USER, user_op, client_data, 0 ); } else { ev = gex_Coll_ReduceToAllNB( team, a_address, a_address, GEX_DT_USER, c_sizeof_a, num_elements, GEX_OP_USER, user_op, client_data, 0 ); } gex_Event_Wait(ev); } void caf_co_broadcast(CFI_cdesc_t * a_desc, int source_image, int num_elements, gex_TM_t team) { char* c_loc_a = (char*) a_desc->base_addr; size_t c_sizeof_a = a_desc->elem_len; int nbytes = num_elements * c_sizeof_a; int data_type = a_desc->type; gex_Event_t ev = gex_Coll_BroadcastNB(team, source_image-1, c_loc_a, c_loc_a, nbytes, 0); gex_Event_Wait(ev); } //------------------------------------------------------------------- // Typed computational collective subroutines //------------------------------------------------------------------- // Convert CFI_type_t to the corresponding GEX reduction data type // returns the size of the native type static size_t CFI_to_GEX_DT(CFI_type_t cfi_type, gex_DT_t *gex_dt, int *complex_scale) { assert(gex_dt); if_pf (complex_scale) *complex_scale = 1; switch (cfi_type) { // real cases case CFI_type_float: *gex_dt = GEX_DT_FLT; return 4; case CFI_type_double: *gex_dt = GEX_DT_DBL; return 8; // complex cases case CFI_type_float_Complex: *gex_dt = GEX_DT_FLT; if (!complex_scale) gasnett_fatalerror("This operation does not support complex types"); *complex_scale = 2; return 8; case CFI_type_double_Complex: *gex_dt = GEX_DT_DBL; if (!complex_scale) gasnett_fatalerror("This operation does not support complex types"); *complex_scale = 2; return 16; // no support for CFI_type_long_double or CFI_type_long_double_Complex } // integer types #define CFI_INT_CASE(cfi_type_constant, c_type) \ else if (cfi_type == cfi_type_constant) { \ if (sizeof(c_type) == 4) *gex_dt = GEX_DT_I32; \ else if (sizeof(c_type) > 8) \ gasnett_fatalerror("Unsupported wide integer type: %d", (int)cfi_type); \ else *gex_dt = GEX_DT_I64; \ return sizeof(c_type); \ } // these must be handled outside the switch because there are duplicates // for the same reason, start with the most likely candidates if (0) ; CFI_INT_CASE(CFI_type_int64_t, int64_t) CFI_INT_CASE(CFI_type_int32_t, int32_t) CFI_INT_CASE(CFI_type_int16_t, int16_t) CFI_INT_CASE(CFI_type_int8_t, int8_t) CFI_INT_CASE(CFI_type_Bool, _Bool) CFI_INT_CASE(CFI_type_char, char) CFI_INT_CASE(CFI_type_signed_char, signed char) CFI_INT_CASE(CFI_type_short, short int) CFI_INT_CASE(CFI_type_int, int) CFI_INT_CASE(CFI_type_long, long int) CFI_INT_CASE(CFI_type_long_long, long long int) CFI_INT_CASE(CFI_type_size_t, size_t) CFI_INT_CASE(CFI_type_int_least8_t, int_least8_t) CFI_INT_CASE(CFI_type_int_least16_t, int_least16_t) CFI_INT_CASE(CFI_type_int_least32_t, int_least32_t) CFI_INT_CASE(CFI_type_int_least64_t, int_least64_t) CFI_INT_CASE(CFI_type_int_fast8_t, int_fast8_t) CFI_INT_CASE(CFI_type_int_fast16_t, int_fast16_t) CFI_INT_CASE(CFI_type_int_fast32_t, int_fast32_t) CFI_INT_CASE(CFI_type_int_fast64_t, int_fast64_t) CFI_INT_CASE(CFI_type_intmax_t, intmax_t) CFI_INT_CASE(CFI_type_intptr_t, intptr_t) CFI_INT_CASE(CFI_type_ptrdiff_t, ptrdiff_t) #undef CFI_INT_CASE gasnett_fatalerror("Unrecognized type: %d", (int)cfi_type); } // widen an 8- or 16-bit integer array to 64-bit static int64_t *widen_from_array(CFI_cdesc_t* a_desc, size_t num_elements) { assert(a_desc); int64_t *res = malloc(8 * num_elements); assert(res); if (a_desc->elem_len == 1) { int8_t *src = a_desc->base_addr; for (size_t i=0; i < num_elements; i++) res[i] = src[i]; } else if (a_desc->elem_len == 2) { int16_t *src = a_desc->base_addr; for (size_t i=0; i < num_elements; i++) res[i] = src[i]; } else gasnett_fatalerror("Logic error in widen_from_array: %i", a_desc->elem_len); return res; } // narrow a 64-bit integer array result back to 8- or 16-bit static void narrow_to_array(CFI_cdesc_t* a_desc, int64_t *src, size_t num_elements) { assert(a_desc); assert(src); if (a_desc->elem_len == 1) { int8_t *dst = a_desc->base_addr; for (size_t i=0; i < num_elements; i++) dst[i] = src[i]; } else if (a_desc->elem_len == 2) { int16_t *dst = a_desc->base_addr; for (size_t i=0; i < num_elements; i++) dst[i] = src[i]; } else gasnett_fatalerror("Logic error in narrow_to_array: %i", a_desc->elem_len); free(src); } GASNETT_INLINE(caf_co_common) void caf_co_common(CFI_cdesc_t* a_desc, int result_image, size_t num_elements, gex_TM_t team, gex_OP_t g_op) { int complex_scale = 1; gex_DT_t g_dt; size_t elem_sz = CFI_to_GEX_DT(a_desc->type, &g_dt, (g_op == GEX_OP_ADD ? &complex_scale : NULL)); int64_t * bounce_buffer = NULL; void * g_addr = a_desc->base_addr; size_t g_elem_sz = a_desc->elem_len; assert(g_elem_sz == elem_sz); if_pf (complex_scale != 1) { // complex input, only permitted in prif_co_sum assert(g_op == GEX_OP_ADD); assert(complex_scale == 2); assert(g_elem_sz == 8 || g_elem_sz == 16); g_elem_sz >>= 1; num_elements <<= 1; } else if_pf(elem_sz < 4) { bounce_buffer = widen_from_array(a_desc, num_elements); assert(g_dt == GEX_DT_I64); g_elem_sz = 8; g_addr = bounce_buffer; } gex_Event_t ev; if (result_image) { ev = gex_Coll_ReduceToOneNB(team, result_image-1, g_addr, g_addr, g_dt, g_elem_sz, num_elements, g_op, NULL, NULL, 0); } else { ev = gex_Coll_ReduceToAllNB(team, g_addr, g_addr, g_dt, g_elem_sz, num_elements, g_op, NULL, NULL, 0); } gex_Event_Wait(ev); if_pf(bounce_buffer) narrow_to_array(a_desc, bounce_buffer, num_elements); } void caf_co_max(CFI_cdesc_t* a_desc, int result_image, size_t num_elements, gex_TM_t team) { caf_co_common(a_desc, result_image, num_elements, team, GEX_OP_MAX); } void caf_co_min(CFI_cdesc_t* a_desc, int result_image, size_t num_elements, gex_TM_t team) { caf_co_common(a_desc, result_image, num_elements, team, GEX_OP_MIN); } void caf_co_sum(CFI_cdesc_t* a_desc, int result_image, size_t num_elements, gex_TM_t team) { caf_co_common(a_desc, result_image, num_elements, team, GEX_OP_ADD); } //------------------------------------------------------------------- void caf_form_team(gex_TM_t current_team, gex_TM_t* new_team, int64_t team_number, int new_index) { // GASNet color argument is int (32-bit), check for value truncation: assert((unsigned int)team_number == team_number); gex_TM_Split(new_team, current_team, team_number, new_index, NULL, 0, GEX_FLAG_TM_NO_SCRATCH); } fortran-caffeine-0.7.2/src/caffeine/coarray_access_s.F900000664000175000017500000003030715162221361023231 0ustar alastairalastair! Copyright (c), The Regents of the University of California ! Terms of use are as specified in LICENSE.txt #include "assert_macros.h" submodule(prif:prif_private_s) coarray_access_s ! DO NOT ADD USE STATEMENTS HERE ! All use statements belong in prif_private_s.F90 implicit none contains ! _______________________ Contiguous Put RMA ____________________________ module procedure prif_put integer(c_intptr_t) :: remote_base call_assert(offset >= 0) call base_pointer(coarray_handle, image_num, remote_base) call prif_put_indirect( & image_num = image_num, & remote_ptr = remote_base + offset, & current_image_buffer = current_image_buffer, & size_in_bytes = size_in_bytes, & stat = stat, errmsg = errmsg, errmsg_alloc = errmsg_alloc) end procedure module procedure prif_put_indirect call_assert_describe(image_num > 0 .and. image_num <= initial_team%num_images, "image_num not within valid range") call caf_put( & image = image_num, & dest = remote_ptr, & src = current_image_buffer, & size = size_in_bytes) if (present(stat)) stat = 0 end procedure module procedure prif_put_with_notify integer(c_intptr_t) :: remote_base integer(c_intptr_t) :: notify_remote_base call_assert(offset >= 0) call_assert(notify_offset >= 0) call base_pointer(coarray_handle, image_num, remote_base) call base_pointer(notify_coarray_handle, image_num, notify_remote_base) call prif_put_indirect_with_notify_indirect( & image_num = image_num, & remote_ptr = remote_base + offset, & current_image_buffer = current_image_buffer, & size_in_bytes = size_in_bytes, & notify_ptr = notify_remote_base + notify_offset, & stat = stat, errmsg = errmsg, errmsg_alloc = errmsg_alloc) end procedure module procedure prif_put_with_notify_indirect integer(c_intptr_t) :: remote_base call_assert(offset >= 0) call base_pointer(coarray_handle, image_num, remote_base) call prif_put_indirect_with_notify_indirect( & image_num = image_num, & remote_ptr = remote_base + offset, & current_image_buffer = current_image_buffer, & size_in_bytes = size_in_bytes, & notify_ptr = notify_ptr, & stat = stat, errmsg = errmsg, errmsg_alloc = errmsg_alloc) end procedure module procedure prif_put_indirect_with_notify integer(c_intptr_t) :: notify_remote_base call_assert(notify_offset >= 0) call base_pointer(notify_coarray_handle, image_num, notify_remote_base) call prif_put_indirect_with_notify_indirect( & image_num = image_num, & remote_ptr = remote_ptr, & current_image_buffer = current_image_buffer, & size_in_bytes = size_in_bytes, & notify_ptr = notify_remote_base + notify_offset, & stat = stat, errmsg = errmsg, errmsg_alloc = errmsg_alloc) end procedure module procedure prif_put_indirect_with_notify_indirect call_assert_describe(image_num > 0 .and. image_num <= initial_team%num_images, "image_num not within valid range") call caf_put( & image = image_num, & dest = remote_ptr, & src = current_image_buffer, & size = size_in_bytes) call caf_event_post(image_num, notify_ptr, & segment_boundary=0, release_fence=1) if (present(stat)) stat = 0 end procedure ! _______________________ Contiguous Get RMA ____________________________ module procedure prif_get integer(c_intptr_t) :: remote_base call_assert(offset >= 0) call base_pointer(coarray_handle, image_num, remote_base) call prif_get_indirect( & image_num = image_num, & remote_ptr = remote_base + offset, & current_image_buffer = current_image_buffer, & size_in_bytes = size_in_bytes, & stat = stat, & errmsg = errmsg, & errmsg_alloc = errmsg_alloc) end procedure module procedure prif_get_indirect call_assert_describe(image_num > 0 .and. image_num <= initial_team%num_images, "image_num not within valid range") call caf_get( & image = image_num, & dest = current_image_buffer, & src = remote_ptr, & size = size_in_bytes) if (present(stat)) stat = 0 end procedure ! _______________________ Strided Get RMA ____________________________ ! This helper ensures the metadata arrays are contiguous (RMA data may still be non-contiguous) subroutine get_strided_helper( & image_num, remote_ptr, remote_stride, current_image_buffer, current_image_stride, element_size, extent, & stat, errmsg, errmsg_alloc) implicit none integer(c_int), intent(in) :: image_num integer(c_intptr_t), intent(in) :: remote_ptr integer(c_ptrdiff_t), intent(in), target, contiguous :: remote_stride(:) type(c_ptr), intent(in) :: current_image_buffer integer(c_ptrdiff_t), intent(in), target, contiguous :: current_image_stride(:) integer(c_size_t), intent(in) :: element_size integer(c_size_t), intent(in), target, contiguous :: extent(:) integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc call_assert_describe(image_num > 0 .and. image_num <= initial_team%num_images, "image_num not within valid range") call_assert(size(remote_stride) == size(extent)) call_assert(size(current_image_stride) == size(extent)) call caf_get_strided( & dims = size(extent), & image_num = image_num, & remote_ptr = remote_ptr, & remote_stride = c_loc(remote_stride), & current_image_buffer = current_image_buffer, & current_image_stride = c_loc(current_image_stride), & element_size = element_size, & extent = c_loc(extent)) if (present(stat)) stat = 0 end subroutine module procedure prif_get_strided integer(c_intptr_t) :: remote_base call_assert(offset >= 0) call base_pointer(coarray_handle, image_num, remote_base) call prif_get_strided_indirect( & image_num = image_num, & remote_ptr = remote_base + offset, & remote_stride = remote_stride, & current_image_buffer = current_image_buffer, & current_image_stride = current_image_stride, & element_size = element_size, & extent = extent, & stat = stat, & errmsg = errmsg, & errmsg_alloc = errmsg_alloc) end procedure module procedure prif_get_strided_indirect call get_strided_helper( & image_num = image_num, & remote_ptr = remote_ptr, & remote_stride = remote_stride, & current_image_buffer = current_image_buffer, & current_image_stride = current_image_stride, & element_size = element_size, & extent = extent, & stat = stat, & errmsg = errmsg, & errmsg_alloc = errmsg_alloc) end procedure ! _______________________ Strided Put RMA ____________________________ ! This helper ensures the metadata arrays are contiguous (RMA data may still be non-contiguous) subroutine put_strided_helper( & image_num, remote_ptr, remote_stride, current_image_buffer, current_image_stride, element_size, extent, & stat, errmsg, errmsg_alloc) implicit none integer(c_int), intent(in) :: image_num integer(c_intptr_t), intent(in) :: remote_ptr integer(c_ptrdiff_t), intent(in), target, contiguous :: remote_stride(:) type(c_ptr), intent(in) :: current_image_buffer integer(c_ptrdiff_t), intent(in), target, contiguous :: current_image_stride(:) integer(c_size_t), intent(in) :: element_size integer(c_size_t), intent(in), target, contiguous :: extent(:) integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc call_assert_describe(image_num > 0 .and. image_num <= initial_team%num_images, "image_num not within valid range") call_assert(size(remote_stride) == size(extent)) call_assert(size(current_image_stride) == size(extent)) call caf_put_strided( & dims = size(extent), & image_num = image_num, & remote_ptr = remote_ptr, & remote_stride = c_loc(remote_stride), & current_image_buffer = current_image_buffer, & current_image_stride = c_loc(current_image_stride), & element_size = element_size, & extent = c_loc(extent)) if (present(stat)) stat = 0 end subroutine module procedure prif_put_strided integer(c_intptr_t) :: remote_base call_assert(offset >= 0) call base_pointer(coarray_handle, image_num, remote_base) call prif_put_strided_indirect( & image_num = image_num, & remote_ptr = remote_base + offset, & remote_stride = remote_stride, & current_image_buffer = current_image_buffer, & current_image_stride = current_image_stride, & element_size = element_size, & extent = extent, & stat = stat, errmsg = errmsg, errmsg_alloc = errmsg_alloc) end procedure module procedure prif_put_strided_indirect call put_strided_helper( & image_num = image_num, & remote_ptr = remote_ptr, & remote_stride = remote_stride, & current_image_buffer = current_image_buffer, & current_image_stride = current_image_stride, & element_size = element_size, & extent = extent, & stat = stat, errmsg = errmsg, errmsg_alloc = errmsg_alloc) end procedure module procedure prif_put_strided_with_notify integer(c_intptr_t) :: remote_base integer(c_intptr_t) :: notify_remote_base call_assert(offset >= 0) call_assert(notify_offset >= 0) call base_pointer(coarray_handle, image_num, remote_base) call base_pointer(notify_coarray_handle, image_num, notify_remote_base) call prif_put_strided_indirect_with_notify_indirect( & image_num = image_num, & remote_ptr = remote_base + offset, & remote_stride = remote_stride, & current_image_buffer = current_image_buffer, & current_image_stride = current_image_stride, & element_size = element_size, & extent = extent, & notify_ptr = notify_remote_base + notify_offset, & stat = stat, errmsg = errmsg, errmsg_alloc = errmsg_alloc) end procedure module procedure prif_put_strided_with_notify_indirect integer(c_intptr_t) :: remote_base call_assert(offset >= 0) call base_pointer(coarray_handle, image_num, remote_base) call prif_put_strided_indirect_with_notify_indirect( & image_num = image_num, & remote_ptr = remote_base + offset, & remote_stride = remote_stride, & current_image_buffer = current_image_buffer, & current_image_stride = current_image_stride, & element_size = element_size, & extent = extent, & notify_ptr = notify_ptr, & stat = stat, errmsg = errmsg, errmsg_alloc = errmsg_alloc) end procedure module procedure prif_put_strided_indirect_with_notify integer(c_intptr_t) :: notify_remote_base call_assert(notify_offset >= 0) call base_pointer(notify_coarray_handle, image_num, notify_remote_base) call prif_put_strided_indirect_with_notify_indirect( & image_num = image_num, & remote_ptr = remote_ptr, & remote_stride = remote_stride, & current_image_buffer = current_image_buffer, & current_image_stride = current_image_stride, & element_size = element_size, & extent = extent, & notify_ptr = notify_remote_base + notify_offset, & stat = stat, errmsg = errmsg, errmsg_alloc = errmsg_alloc) end procedure module procedure prif_put_strided_indirect_with_notify_indirect call put_strided_helper( & image_num = image_num, & remote_ptr = remote_ptr, & remote_stride = remote_stride, & current_image_buffer = current_image_buffer, & current_image_stride = current_image_stride, & element_size = element_size, & extent = extent, & stat = stat, errmsg = errmsg, errmsg_alloc = errmsg_alloc) call caf_event_post(image_num, notify_ptr, & segment_boundary=0, release_fence=1) end procedure end submodule coarray_access_s fortran-caffeine-0.7.2/src/caffeine/caffeine-internal.h0000664000175000017500000000162015162221361023165 0ustar alastairalastair# /* Copyright (c), The Regents of the University of California */ # /* Terms of use are as specified in LICENSE.txt */ # /* NOTE: this is a dual-language header file, */ # /* and should ONLY contain portable preprocessor directives. */ # /* define some macro portability helpers */ #if defined(__GFORTRAN__) || defined(_CRAYFTN) || defined(NAGFOR) # define CAF_CONCAT2(x,y) x/**/y # define CAF_CONCAT3(x,y,z) x/**/y/**/z # define CAF_STRINGIFY_HELPER(x) "x" #else # define CAF_CONCAT2(x,y) x##y # define CAF_CONCAT3(x,y,z) x##y##z # define CAF_STRINGIFY_HELPER(x) #x #endif #define CAF_STRINGIFY(x) CAF_STRINGIFY_HELPER(x) # /* AMO support defines */ #define CAF_OP_GET 0 #define CAF_OP_SET 1 #define CAF_OP_ADD 2 #define CAF_OP_AND 3 #define CAF_OP_OR 4 #define CAF_OP_XOR 5 #define CAF_OP_FADD 6 #define CAF_OP_FAND 7 #define CAF_OP_FOR 8 #define CAF_OP_FXOR 9 #define CAF_OP_FCAS 10 fortran-caffeine-0.7.2/src/caffeine/teams_s.F900000664000175000017500000001041115162221361021353 0ustar alastairalastair! Copyright (c), The Regents of the University of California ! Terms of use are as specified in LICENSE.txt #include "language-support.F90" submodule(prif:prif_private_s) teams_s ! DO NOT ADD USE STATEMENTS HERE ! All use statements belong in prif_private_s.F90 implicit none contains module procedure prif_change_team team%info%heap_start = current_team%info%child_heap_info%offset + current_team%info%heap_start team%info%heap_size = current_team%info%child_heap_info%size if (caf_this_image(team%info%gex_team) == 1) then ! need to setup the heap for the team call caf_establish_mspace( & team%info%heap_mspace, & as_c_ptr(team%info%heap_start), & current_team%info%child_heap_info%size) end if current_team = team if (caf_have_child_teams()) then ! need to establish heap for child teams call caf_establish_child_heap end if call prif_sync_all ! child team sync required by F23 11.1.5.2 if (present(stat)) stat = 0 end procedure module procedure prif_end_team type(prif_coarray_handle), allocatable :: teams_coarrays(:) integer :: num_coarrays_in_team, i type(prif_coarray_descriptor), pointer :: tmp_data ! deallocate the teams coarrays ! Currently we work to batch together all the deallocations into a single call ! to prif_deallocate_coarray(), in the hope it can amortize some costs num_coarrays_in_team = 0 tmp_data => current_team%info%coarrays do while (associated(tmp_data)) num_coarrays_in_team = num_coarrays_in_team + 1 call c_f_pointer(tmp_data%next_handle, tmp_data) end do if (num_coarrays_in_team > 0) then allocate(teams_coarrays(num_coarrays_in_team)) tmp_data => current_team%info%coarrays do i = 1, num_coarrays_in_team teams_coarrays(i)%info => tmp_data call c_f_pointer(tmp_data%next_handle, tmp_data) end do #if CAF_PRIF_VERSION <= 6 call prif_deallocate_coarray & #else call prif_deallocate_coarrays & #endif (teams_coarrays, stat, errmsg, errmsg_alloc) nullify(current_team%info%coarrays) else ! child team sync required by F23 11.1.5.2, ! because we skipped the prif_deallocate_coarray call above that includes same call prif_sync_all end if ! set the current team back to the parent team current_team%info => current_team%info%parent_team if (present(stat)) stat = 0 end procedure module procedure prif_form_team call prif_sync_memory ! indicates this is the first time we're creating a child team if (.not.caf_have_child_teams()) then allocate(current_team%info%child_heap_info) call caf_establish_child_heap end if block integer(c_int) :: new_index_ if (present(new_index)) then new_index_ = new_index else new_index_ = 1 end if ! DOB: The two allocates in this procedure do not have a corresponding deallocate, ! because Fortran lacks a destroy team operation. We consider this to represent ! a defect in the Fortran design of teams. ! As such, team-specific state such as these data structures and the corresponding ! team-related data structures in GASNet can never be reclaimed. allocate(team%info) team%info%parent_team => current_team%info call caf_form_team(current_team%info%gex_team, team%info%gex_team, team_number, new_index_) team%info%team_number = team_number team%info%this_image = caf_this_image(team%info%gex_team) team%info%num_images = caf_num_images(team%info%gex_team) end block if (present(stat)) stat = 0 end procedure module procedure prif_get_team if (.not. present(level)) then team = current_team else if (level == PRIF_CURRENT_TEAM) then team = current_team else if (level == PRIF_PARENT_TEAM) then team = prif_team_type(current_team%info%parent_team) else if (level == PRIF_INITIAL_TEAM) then team = prif_team_type(initial_team) else call prif_error_stop(.false._c_bool, stop_code_char="prif_get_team: invalid level") endif end procedure module procedure prif_team_number if (present(team)) then team_number = team%info%team_number else team_number = current_team%info%team_number endif end procedure end submodule fortran-caffeine-0.7.2/src/caffeine/prif_private_s.F900000664000175000017500000004662215162221361022751 0ustar alastairalastair! Copyright (c), The Regents of the University of California ! Terms of use are as specified in LICENSE.txt #include "assert_macros.h" submodule(prif) prif_private_s use assert_m use iso_fortran_env, only : & output_unit, & error_unit use iso_c_binding, only: & c_associated, & c_f_pointer, & c_f_procpointer, & c_funloc, & c_int32_t, & c_loc, & c_null_funptr, & c_sizeof, & ! DOB: The following is a gfortran-14 bug workaround. No idea why this works... c_funptr_ => c_funptr implicit none type(prif_team_descriptor), target :: initial_team type(prif_team_type) :: current_team type(c_ptr) :: non_symmetric_heap_mspace integer(c_intptr_t) :: total_heap_size, non_symmetric_heap_size interface ! ________ Program initiation and finalization ___________ subroutine caf_caffeinate( & total_heap_size, & symmetric_heap, & symmetric_heap_start, & symmetric_heap_size, & non_symmetric_heap, & initial_team) & bind(C) import c_ptr, c_intptr_t implicit none integer(c_intptr_t), intent(out) :: total_heap_size, symmetric_heap_start, symmetric_heap_size type(c_ptr), intent(out) :: symmetric_heap, non_symmetric_heap type(c_ptr), intent(out) :: initial_team end subroutine subroutine caf_decaffeinate(exit_code) bind(C) !! void c_decaffeinate(); import c_int implicit none integer(c_int), value :: exit_code end subroutine subroutine caf_fail_image() bind(C) !! void caf_fail_image(); implicit none end subroutine pure subroutine caf_fatal_error(str) bind(C) !! void caf_fatal_error( const CFI_cdesc_t* Fstr ) import c_char implicit none character(kind=c_char,len=:), pointer, intent(in) :: str end subroutine ! _________________ Image enumeration ____________________ function caf_this_image(gex_team) bind(C) !! int caf_this_image(gex_TM_t gex_team); import c_ptr, c_int implicit none type(c_ptr), value :: gex_team integer(c_int) caf_this_image end function pure function caf_num_images(gex_team) bind(C) !! int caf_num_images(gex_TM_t gex_team); import c_ptr, c_int implicit none type(c_ptr), value :: gex_team integer(c_int) caf_num_images end function function caf_image_to_initial(gex_team, image_num) bind(C) !! int caf_image_to_initial(gex_TM_t tm, int image_num) import c_ptr, c_int implicit none type(c_ptr), value :: gex_team integer(c_int), value :: image_num integer(c_int) caf_image_to_initial end function function caf_image_from_initial(gex_team, image_num) bind(C) !! int caf_image_from_initial(gex_TM_t tm, int image_num) import c_ptr, c_int implicit none type(c_ptr), value :: gex_team integer(c_int), value :: image_num integer(c_int) caf_image_from_initial end function ! _________________ Memory allocation ____________________ function caf_allocate(mspace, bytes) result(ptr) bind(c) import c_size_t, c_ptr implicit none type(c_ptr), intent(in), value :: mspace integer(c_size_t), intent(in), value :: bytes type(c_ptr) :: ptr end function subroutine caf_allocate_remaining(mspace, allocated_space, allocated_size) bind(c) import c_size_t, c_ptr implicit none type(c_ptr), intent(in), value :: mspace type(c_ptr), intent(out) :: allocated_space integer(c_size_t), intent(out) :: allocated_size end subroutine subroutine caf_deallocate(mspace, mem) bind(c) import c_ptr implicit none type(c_ptr), intent(in), value :: mspace type(c_ptr), intent(in), value :: mem end subroutine subroutine caf_establish_mspace(mspace, mem, mem_size) bind(c) import c_size_t, c_ptr implicit none type(c_ptr), intent(out) :: mspace type(c_ptr), intent(in), value :: mem integer(c_size_t), intent(in), value :: mem_size end subroutine ! ___________________ PRIF Queries ______________________ module function caf_convert_base_addr(addr, image) result(ptr) bind(c) implicit none type(c_ptr), intent(in), value :: addr integer(c_int), intent(in), value :: image integer(c_intptr_t) :: ptr end function ! _______________________ Contiguous RMA ____________________________ subroutine caf_put(image, dest, src, size) bind(c) !! void caf_put(int image, intptr_t dest, void* src, size_t size) import c_ptr, c_int, c_intptr_t, c_size_t implicit none integer(c_int), intent(in), value :: image integer(c_intptr_t), intent(in), value :: dest type(c_ptr), intent(in), value :: src integer(c_size_t), intent(in), value :: size end subroutine subroutine caf_get(image, dest, src, size) bind(c) !! void caf_get(int image, void* dest, intptr_t src, size_t size) import c_ptr, c_int, c_intptr_t, c_size_t implicit none integer(c_int), intent(in), value :: image type(c_ptr), intent(in), value :: dest integer(c_intptr_t), intent(in), value :: src integer(c_size_t), intent(in), value :: size end subroutine ! _______________________ Strided RMA ____________________________ subroutine caf_put_strided(dims, image_num, remote_ptr, remote_stride, & current_image_buffer, current_image_stride, & element_size, extent) bind(c) !! void caf_put_strided(int dims, int image_num, !! intptr_t remote_ptr, void* remote_stride, !! void *current_image_buffer, void * current_image_stride, !! size_t element_size, void *extent) import c_ptr, c_int, c_intptr_t, c_size_t implicit none integer(c_int), intent(in), value :: dims integer(c_int), intent(in), value :: image_num integer(c_intptr_t), intent(in), value :: remote_ptr type(c_ptr), intent(in), value :: remote_stride type(c_ptr), intent(in), value :: current_image_buffer type(c_ptr), intent(in), value :: current_image_stride integer(c_size_t), intent(in), value :: element_size type(c_ptr), intent(in), value :: extent end subroutine subroutine caf_get_strided(dims, image_num, remote_ptr, remote_stride, & current_image_buffer, current_image_stride, & element_size, extent) bind(c) !! void caf_get_strided(int dims, int image_num, !! intptr_t remote_ptr, void* remote_stride, !! void *current_image_buffer, void * current_image_stride, !! size_t element_size, void *extent) import c_ptr, c_int, c_intptr_t, c_size_t implicit none integer(c_int), intent(in), value :: dims integer(c_int), intent(in), value :: image_num integer(c_intptr_t), intent(in), value :: remote_ptr type(c_ptr), intent(in), value :: remote_stride type(c_ptr), intent(in), value :: current_image_buffer type(c_ptr), intent(in), value :: current_image_stride integer(c_size_t), intent(in), value :: element_size type(c_ptr), intent(in), value :: extent end subroutine ! __________________ SYNC Statements _____________________ module subroutine sync_init() end subroutine subroutine caf_sync_memory() bind(C) !! void caf_sync_memory(); end subroutine subroutine caf_sync_team(team) bind(C) !! void caf_sync_team(gex_TM_t team); import c_ptr implicit none type(c_ptr), value :: team end subroutine ! _______________________ Events ____________________________ subroutine caf_event_post(image, event_var_ptr, segment_boundary, release_fence) bind(c) !! void caf_event_post(int image, intptr_t event_var_ptr, int segment_boundary, int release_fence) import c_int, c_intptr_t implicit none integer(c_int), intent(in), value :: image integer(c_intptr_t), intent(in), value :: event_var_ptr integer(c_int), intent(in), value :: segment_boundary integer(c_int), intent(in), value :: release_fence end subroutine subroutine caf_event_wait(event_var_ptr, threshold, segment_boundary, acquire_fence) bind(c) !! void caf_event_wait(void *event_var_ptr, int64_t threshold, int segment_boundary, int acquire_fence) import c_int64_t, c_ptr, c_int implicit none type(c_ptr), intent(in), value :: event_var_ptr integer(c_int64_t), intent(in), value :: threshold integer(c_int), intent(in), value :: segment_boundary integer(c_int), intent(in), value :: acquire_fence end subroutine subroutine caf_event_query(event_var_ptr, count) bind(c) !! void caf_event_query(void *event_var_ptr, int64_t *count) import c_int64_t, c_ptr implicit none type(c_ptr), intent(in), value :: event_var_ptr integer(c_int64_t), intent(out) :: count end subroutine ! _______________________ Atomics ____________________________ subroutine caf_atomic_int(opcode, image, addr, result, operand1, operand2) bind(c) !! void caf_atomic_int(int opcode, int image, void* addr, int64_t *result, int64_t op1, int64_t op2) import c_int, c_intptr_t, PRIF_ATOMIC_INT_KIND implicit none integer(c_int), intent(in), value :: opcode integer(c_int), intent(in), value :: image integer(c_intptr_t), intent(in), value :: addr integer(PRIF_ATOMIC_INT_KIND), intent(out) :: result integer(PRIF_ATOMIC_INT_KIND), intent(in), value :: operand1 integer(PRIF_ATOMIC_INT_KIND), intent(in), value :: operand2 end subroutine subroutine caf_atomic_logical(opcode, image, addr, result, operand1, operand2) bind(c) !! void caf_atomic_logical(int opcode, int image, void* addr, int64_t *result, int64_t op1, int64_t op2) import c_int, c_intptr_t, PRIF_ATOMIC_LOGICAL_KIND implicit none integer(c_int), intent(in), value :: opcode integer(c_int), intent(in), value :: image integer(c_intptr_t), intent(in), value :: addr logical(PRIF_ATOMIC_LOGICAL_KIND), intent(out) :: result logical(PRIF_ATOMIC_LOGICAL_KIND), intent(in), value :: operand1 logical(PRIF_ATOMIC_LOGICAL_KIND), intent(in), value :: operand2 end subroutine ! ______________ Collective Subroutines __________________ subroutine caf_co_broadcast(a, source_image, Nelem, team) bind(C) !! void c_co_broadcast(CFI_cdesc_t * a_desc, int source_image, int num_elements, gex_TM_t team); import c_int, c_ptr implicit none type(*) a(..) integer(c_int), value :: source_image, Nelem type(c_ptr), value :: team end subroutine subroutine caf_co_reduce(a, result_image, num_elements, Coll_ReduceSub, client_data, team) bind(C) !! void caf_co_reduce(CFI_cdesc_t* a_desc, int result_image, int num_elements, gex_Coll_ReduceFn_t* user_op, void* client_data) import c_int, c_ptr, c_size_t, c_funptr implicit none type(*) a(..) integer(c_int), value :: result_image type(c_ptr), value :: client_data type(c_funptr), value :: Coll_ReduceSub integer(c_size_t), value :: num_elements type(c_ptr), value :: team end subroutine subroutine caf_co_sum(a, result_image, num_elements, team) bind(C) !! void c_co_sum(CFI_cdesc_t* a_desc, int result_image, size_t num_elements, gex_TM_t team); import c_int, c_ptr, c_size_t implicit none type(*) a(..) integer(c_int), value :: result_image integer(c_size_t), value :: num_elements type(c_ptr), value :: team end subroutine subroutine caf_co_min(a, result_image, num_elements, team) bind(C) !! void c_co_min(CFI_cdesc_t* a_desc, int result_image, size_t num_elements, gex_TM_t team); import c_int, c_ptr, c_size_t implicit none type(*) a(..) integer(c_int), value :: result_image integer(c_size_t), value :: num_elements type(c_ptr), value :: team end subroutine subroutine caf_co_max(a, result_image, num_elements, team) bind(C) !! void c_co_max(CFI_cdesc_t* a_desc, int result_image, size_t num_elements, gex_TM_t team); import c_int, c_ptr, c_size_t implicit none type(*) a(..) integer(c_int), value :: result_image integer(c_size_t), value :: num_elements type(c_ptr), value :: team end subroutine subroutine caf_form_team(current_team, new_team, team_number, new_index) bind(C) !! void caf_form_team(gex_TM_t* current_team, gex_TM_t* new_team, int64_t team_number, int new_index); import c_ptr, c_int, c_int64_t type(c_ptr), intent(in), value :: current_team type(c_ptr), intent(out) :: new_team integer(c_int64_t), intent(in), value :: team_number integer(c_int), intent(in), value :: new_index end subroutine end interface interface num_to_str module procedure num_to_str32 module procedure num_to_str64 end interface contains pure function num_to_str32(num, is_mem_size) result(str) integer(c_int32_t), value :: num logical, intent(in), optional :: is_mem_size character(len=:), allocatable :: str str = num_to_str64(int(num, c_int64_t), is_mem_size) end function pure function num_to_str64(num, is_mem_size) result(str) integer(c_int64_t), value :: num logical, intent(in), optional :: is_mem_size character(len=:), allocatable :: str, unit character(len=40) num_str integer(c_int64_t) :: divisor if (present(is_mem_size)) then if (is_mem_size) then divisor = 1 ! Try to strike a compromise between digits and round off # define CAF_USE_DIV(div, unit_str) \ if ((num .ge. 10*div) .or. (num .ge. div .and. mod(num, div) == 0)) then ; \ divisor = div; unit = unit_str; exit; \ end if do CAF_USE_DIV(ishft(1_c_int64_t,40), " TiB") CAF_USE_DIV(ishft(1_c_int64_t,30), " GiB") CAF_USE_DIV(ishft(1_c_int64_t,20), " MiB") CAF_USE_DIV(ishft(1_c_int64_t,10), " KiB") CAF_USE_DIV(1_c_int64_t, " B"); exit end do num = num / divisor # undef CAF_USE_DIV end if end if write(num_str, '(i0)') num str = trim(adjustl(num_str)) if (allocated(unit)) then str = str // unit end if end function pure function as_int(ptr) type(c_ptr), intent(in) :: ptr integer(c_intptr_t) :: as_int call_assert(storage_size(ptr) == storage_size(as_int)) as_int = transfer(ptr, as_int) end function pure function as_c_ptr(i) integer(c_intptr_t), intent(in) :: i type(c_ptr) :: as_c_ptr as_c_ptr = transfer(i, as_c_ptr) end function subroutine base_pointer(coarray_handle, image_num, ptr) type(prif_coarray_handle), intent(in) :: coarray_handle integer(c_int), intent(in) :: image_num integer(c_intptr_t), intent(out) :: ptr call_assert(coarray_handle_check(coarray_handle)) call_assert_describe(image_num > 0 .and. image_num <= initial_team%num_images, "base_pointer: image_num not within valid range") ptr = caf_convert_base_addr(coarray_handle%info%coarray_data, image_num) end subroutine subroutine unimplemented(proc_name) character(len=*), intent(in) :: proc_name call prif_error_stop(quiet=.false._c_bool, stop_code_char=proc_name // " is not yet implemented") end subroutine pure function optional_value(var) result(c_val) integer, intent(in), optional :: var integer(c_int) c_val if (present(var)) then c_val = var else c_val = 0_c_int end if end function pure subroutine compute_coshape_epp(lcobounds, ucobounds, coshape_epp) !! Compute the exclusive prefix product of the coshape for the given cobounds integer(c_int64_t), intent(in) :: lcobounds(:), ucobounds(:) integer(c_int), intent(out) :: coshape_epp(:) integer(c_int64_t) :: product integer :: d associate (corank => size(lcobounds)) call_assert(corank > 0) call_assert(size(coshape_epp) == corank) call_assert(size(ucobounds) == corank .or. size(ucobounds) == corank-1) coshape_epp(1) = 1 product = 1 do d = 2, corank product = product * (ucobounds(d-1) - lcobounds(d-1) + 1) call_assert_describe(product < huge(0_c_int), "Overflow in cobounds. product(coshape(a)) must be < 2 billion") coshape_epp(d) = int(product, c_int) end do end associate end subroutine ! Report the provided error stat/msg using the provided optional stat/errmsg args subroutine report_error(report_stat, report_msg, stat, errmsg, errmsg_alloc) integer(c_int), intent(in) :: report_stat character(len=*), intent(in) :: report_msg integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable , optional :: errmsg_alloc call_assert(report_stat /= 0) if (.not. present(stat)) then call prif_error_stop(.false._c_bool, stop_code_char=report_msg) else stat = report_stat if (present(errmsg)) then errmsg = report_msg else if (present(errmsg_alloc)) then errmsg_alloc = report_msg end if end if end subroutine ! verify state invariants for a coarray_handle ! Note this function validates invariants with deliberately UNconditional assertions ! Suggested caller usage for conditional validation is: ! call_assert(coarray_handle_check(coarray_handle)) elemental impure function coarray_handle_check(coarray_handle) result(result_) implicit none type(prif_coarray_handle), intent(in) :: coarray_handle logical :: result_ integer(c_int) :: i, epp(15) call assert_always(associated(coarray_handle%info), "unassociated info pointer in prif_coarray_handle") associate(info => coarray_handle%info, corank => coarray_handle%info%corank) call assert_always(corank >= 1, "invalid corank in prif_coarray_handle") call assert_always(corank <= size(info%lcobounds), "invalid corank in prif_coarray_handle") call assert_always(all([(info%lcobounds(i) <= info%ucobounds(i), i = 1, corank-1)]), & "invalid cobounds in prif_coarray_handle") call assert_always(info%coarray_size > 0, "invalid data size in prif_coarray_handle") call assert_always(c_associated(info%coarray_data), "invalid data pointer in prif_coarray_handle") call compute_coshape_epp(info%lcobounds(1:corank),info%ucobounds(1:corank-1),epp(1:corank)) call assert_always(all(info%coshape_epp(1:corank) == epp(1:corank)), & "invalid coshape_epp in prif_coarray_handle") end associate result_ = .true. end function subroutine caf_establish_child_heap if (current_team%info%this_image == 1) then call caf_allocate_remaining( & current_team%info%heap_mspace, & current_team%info%child_heap_info%allocated_memory, & current_team%info%child_heap_info%size) current_team%info%child_heap_info%offset = & as_int(current_team%info%child_heap_info%allocated_memory) - current_team%info%heap_start end if call prif_co_broadcast(current_team%info%child_heap_info, 1) end subroutine logical function caf_have_child_teams() caf_have_child_teams = associated(current_team%info%child_heap_info) end function end submodule prif_private_s fortran-caffeine-0.7.2/src/caffeine/co_sum_s.F900000664000175000017500000000213215162221361021530 0ustar alastairalastair! Copyright (c), The Regents of the University of California ! Terms of use are as specified in LICENSE.txt #include "assert_macros.h" submodule(prif:prif_private_s) co_sum_s ! DO NOT ADD USE STATEMENTS HERE ! All use statements belong in prif_private_s.F90 implicit none contains module procedure prif_co_sum if (present(result_image)) then call_assert(result_image >= 1 .and. result_image <= current_team%info%num_images) endif call contiguous_co_sum(a, result_image, stat, errmsg, errmsg_alloc) end procedure subroutine contiguous_co_sum(a, result_image, stat, errmsg, errmsg_alloc) type(*), intent(inout), target, contiguous :: a(..) integer(c_int), intent(in), optional :: result_image integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc if (present(stat)) stat=0 call caf_co_sum( & a, optional_value(result_image), int(product(shape(a)), c_size_t), current_team%info%gex_team) end subroutine end submodule co_sum_s fortran-caffeine-0.7.2/src/caffeine/co_min_s.F900000664000175000017500000000451515162221361021516 0ustar alastairalastair! Copyright (c), The Regents of the University of California ! Terms of use are as specified in LICENSE.txt #include "assert_macros.h" submodule(prif:prif_private_s) co_min_s ! DO NOT ADD USE STATEMENTS HERE ! All use statements belong in prif_private_s.F90 implicit none contains module procedure prif_co_min if (present(result_image)) then call_assert(result_image >= 1 .and. result_image <= current_team%info%num_images) endif call contiguous_co_min(a, result_image, stat, errmsg, errmsg_alloc) end procedure subroutine contiguous_co_min(a, result_image, stat, errmsg, errmsg_alloc) implicit none type(*), intent(inout), target, contiguous :: a(..) integer(c_int), intent(in), optional :: result_image integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc if (present(stat)) stat=0 call caf_co_min( & a, & optional_value(result_image), & int(product(shape(a)), c_size_t), & current_team%info%gex_team) end subroutine subroutine char_min_wrapper(arg1, arg2_and_out, count, cdata) bind(C) type(c_ptr), intent(in), value :: arg1, arg2_and_out integer(c_size_t), intent(in), value :: count type(c_ptr), intent(in), value :: cdata integer(c_size_t), pointer :: char_len integer(c_size_t) :: i if (count == 0) return call c_f_pointer(cdata, char_len) block character(len=char_len,kind=c_char), pointer :: lhs(:), rhs_and_result(:) call c_f_pointer(arg1, lhs, [count]) call c_f_pointer(arg2_and_out, rhs_and_result, [count]) do i = 1, count if (lhs(i) <= rhs_and_result(i)) rhs_and_result(i) = lhs(i) end do end block end subroutine module procedure prif_co_min_character integer(c_size_t), target :: char_len procedure(prif_operation_wrapper_interface), pointer :: op char_len = len(a) op => char_min_wrapper #if defined(__GFORTRAN__) && 0 ! gfortran 13.2 (sometimes?) crashes on the call below ! internal compiler error: in make_decl_rtl, at varasm.cc:1442 call unimplemented("prif_co_min_character") #else call prif_co_reduce(a, op, c_loc(char_len), result_image, stat, errmsg, errmsg_alloc) #endif end procedure end submodule co_min_s fortran-caffeine-0.7.2/src/caffeine/events_s.F900000664000175000017500000000350315162221361021552 0ustar alastairalastair! Copyright (c), The Regents of the University of California ! Terms of use are as specified in LICENSE.txt #include "assert_macros.h" submodule(prif:prif_private_s) events_s ! DO NOT ADD USE STATEMENTS HERE ! All use statements belong in prif_private_s.F90 implicit none contains module procedure prif_event_post integer(c_intptr_t) :: remote_base call_assert(coarray_handle_check(coarray_handle)) call_assert(offset >= 0) call base_pointer(coarray_handle, image_num, remote_base) call prif_event_post_indirect( & image_num = image_num, & event_var_ptr = remote_base + offset, & stat = stat, errmsg = errmsg, errmsg_alloc = errmsg_alloc) end procedure module procedure prif_event_post_indirect call_assert_describe(image_num > 0 .and. image_num <= initial_team%num_images, "image_num not within valid range") call caf_event_post(image_num, event_var_ptr, & segment_boundary=1, release_fence=1) if (present(stat)) stat = 0 end procedure module procedure prif_event_wait integer(c_int64_t) :: threshold if (present(until_count)) then threshold = MAX(until_count, 1) else threshold = 1 endif call caf_event_wait(event_var_ptr, threshold, & segment_boundary=1, acquire_fence=1) if (present(stat)) stat = 0 end procedure module procedure prif_event_query call caf_event_query(event_var_ptr, count) if (present(stat)) stat = 0 end procedure module procedure prif_notify_wait integer(c_int64_t) :: threshold if (present(until_count)) then threshold = MAX(until_count, 1) else threshold = 1 endif call caf_event_wait(notify_var_ptr, threshold, & segment_boundary=0, acquire_fence=1) if (present(stat)) stat = 0 end procedure end submodule events_s fortran-caffeine-0.7.2/src/caffeine/atomic_s.F900000664000175000017500000000577115162221361021533 0ustar alastairalastair! Copyright (c), The Regents of the University of California ! Terms of use are as specified in LICENSE.txt #include "assert_macros.h" #include "caffeine-internal.h" submodule(prif:prif_private_s) atomic_s ! DO NOT ADD USE STATEMENTS HERE ! All use statements belong in prif_private_s.F90 implicit none ! placeholder variables that simplify the macro logic below integer(PRIF_ATOMIC_INT_KIND) :: out_placeholder_int logical(PRIF_ATOMIC_LOGICAL_KIND) :: out_placeholder_logical integer(PRIF_ATOMIC_INT_KIND), parameter :: in_placeholder_int = 0 logical(PRIF_ATOMIC_LOGICAL_KIND), parameter :: in_placeholder_logical = .false. contains #define ATOMIC_OP(OPNAME, OPCODE, caf_op) \ module procedure CAF_CONCAT2(prif_atomic_,OPNAME) ; \ integer(c_intptr_t) :: remote_base; \ call_assert(offset >= 0); \ call base_pointer(coarray_handle, image_num, remote_base); \ call CAF_CONCAT3(prif_atomic_,OPNAME,_indirect) \ ( image_num, remote_base + offset, OPPASSF, stat ); \ end procedure ; \ module procedure CAF_CONCAT3(prif_atomic_,OPNAME,_indirect) ; \ call_assert(c_sizeof(out_placeholder_int) == 8); call_assert(c_sizeof(out_placeholder_logical) == 8); \ call_assert_describe(image_num > 0 .and. image_num <= initial_team%num_images, "image_num not within valid range"); \ call caf_op(CAF_CONCAT2(CAF_OP_,OPCODE), image_num, atom_remote_ptr, OPPASSC); \ if (present(stat)) stat = 0; \ end procedure ! Extra arg below is another workaround gfortran's sub-standard preprocessor #define ATOMIC_INT_OP(OPNAME,_,OPCODE) ATOMIC_OP(OPNAME,OPCODE,caf_atomic_int) #define ATOMIC_LOG_OP(OPNAME,_,OPCODE) ATOMIC_OP(OPNAME,OPCODE,caf_atomic_logical) ! OPPASSF defines the dummy argument pass-thru in Fortran, ! from the direct module procedure to the indirect variant #undef OPPASSF #define OPPASSF value ! OPPASSC defines the dummy argument pass-thru to C, ! from the indirect module procedure to the BIND(C) call #undef OPPASSC #define OPPASSC value, in_placeholder_int, in_placeholder_int ATOMIC_INT_OP(ref_int, ,GET) #undef OPPASSC #define OPPASSC value, in_placeholder_logical, in_placeholder_logical ATOMIC_LOG_OP(ref_logical, ,GET) #undef OPPASSC #define OPPASSC out_placeholder_logical, value, in_placeholder_logical ATOMIC_LOG_OP(define_logical, ,SET) #undef OPPASSC #define OPPASSC out_placeholder_int, value, in_placeholder_int ATOMIC_INT_OP(define_int, ,SET) ATOMIC_INT_OP(add, ,ADD) ATOMIC_INT_OP(and, ,AND) ATOMIC_INT_OP(or, ,OR) ATOMIC_INT_OP(xor, ,XOR) #undef OPPASSF #define OPPASSF value, old #undef OPPASSC #define OPPASSC old, value, in_placeholder_int ATOMIC_INT_OP(fetch_add, ,FADD) ATOMIC_INT_OP(fetch_and, ,FAND) ATOMIC_INT_OP(fetch_or, ,FOR) ATOMIC_INT_OP(fetch_xor, ,FXOR) #undef OPPASSF #define OPPASSF old, compare, new #undef OPPASSC #define OPPASSC old, compare, new ATOMIC_INT_OP(cas_int, ,FCAS) ATOMIC_LOG_OP(cas_logical, ,FCAS) end submodule atomic_s fortran-caffeine-0.7.2/src/caffeine/co_reduce_s.F900000664000175000017500000000404615162221361022201 0ustar alastairalastair! Copyright (c), The Regents of the University of California ! Terms of use are as specified in LICENSE.txt #include "assert_macros.h" submodule(prif:prif_private_s) co_reduce_s ! DO NOT ADD USE STATEMENTS HERE ! All use statements belong in prif_private_s.F90 implicit none contains module subroutine prif_co_reduce(a, operation_wrapper, cdata, result_image, stat, errmsg, errmsg_alloc) type(*), intent(inout), target :: a(..) procedure(prif_operation_wrapper_interface), pointer, intent(in) :: operation_wrapper type(c_ptr), intent(in), value :: cdata integer(c_int), intent(in), optional :: result_image integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc if (present(result_image)) then call_assert(result_image >= 1 .and. result_image <= current_team%info%num_images) endif call_assert_describe(associated(operation_wrapper), "prif_co_reduce: associated(operation_wrapper)") call contiguous_co_reduce(a, operation_wrapper, cdata, result_image, stat, errmsg, errmsg_alloc) end subroutine subroutine contiguous_co_reduce(a, operation_wrapper, cdata, result_image, stat, errmsg, errmsg_alloc) type(*), intent(inout), target, contiguous :: a(..) procedure(prif_operation_wrapper_interface), pointer, intent(in) :: operation_wrapper type(c_ptr), intent(in), value :: cdata integer(c_int), intent(in), optional :: result_image integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc type(c_funptr) :: funptr if (present(stat)) stat=0 funptr = c_funloc(operation_wrapper) call_assert(c_associated(funptr)) call caf_co_reduce( & a, & optional_value(result_image), & int(product(shape(a)), c_size_t), & funptr, & cdata, & current_team%info%gex_team) end subroutine end submodule co_reduce_s fortran-caffeine-0.7.2/src/caffeine/gasnet_safe.h0000664000175000017500000000203015162221361022066 0ustar alastairalastair// Copyright (c), The Regents of the University of California // Terms of use are as specified in LICENSE.txt #ifndef _GASNET_SAFE_ #define _GASNET_SAFE_ #include /* Macro to check return codes and terminate with useful message. */ #define GASNET_SAFE(fncall) do { \ int _retval; \ if ((_retval = fncall) != GASNET_OK) { \ fprintf(stderr, "ERROR calling: %s\n" \ " at: %s:%i\n" \ " error: %s (%s)\n", \ #fncall, __FILE__, __LINE__, \ gasnet_ErrorName(_retval), gasnet_ErrorDesc(_retval)); \ fflush(stderr); \ gasnet_exit(_retval); \ } \ } while(0) #endif fortran-caffeine-0.7.2/src/caffeine/allocation_s.F900000664000175000017500000002447215162221361022403 0ustar alastairalastair! Copyright (c), The Regents of the University of California ! Terms of use are as specified in LICENSE.txt #include "assert_macros.h" #include "language-support.F90" submodule(prif:prif_private_s) allocation_s ! DO NOT ADD USE STATEMENTS HERE ! All use statements belong in prif_private_s.F90 implicit none contains module procedure prif_allocate_coarray ! TODO: determining the size of the handle and where the coarray begins ! becomes a bit more complicated if we don't allocate space for ! 15 cobounds integer :: me type(c_ptr) :: whole_block integer(c_ptrdiff_t) :: block_offset integer(c_size_t) :: descriptor_size, total_size integer(c_int) :: corank type(prif_coarray_descriptor) :: unused type(prif_coarray_descriptor), pointer :: unused2(:) corank = size(lcobounds) call_assert(corank > 0) if (size(ucobounds) == corank) then call_assert(all(lcobounds <= ucobounds)) call_assert(product(ucobounds - lcobounds + 1) >= current_team%info%num_images) else call_assert(size(ucobounds) == corank - 1) call_assert(all(lcobounds(1:corank-1) <= ucobounds)) end if me = current_team%info%this_image if (caf_have_child_teams()) then ! Free the child team space to make sure we have space to allocate the coarray if (me == 1) then call caf_deallocate(current_team%info%heap_mspace, current_team%info%child_heap_info%allocated_memory) end if end if if (me == 1) then descriptor_size = c_sizeof(unused) total_size = descriptor_size + size_in_bytes whole_block = caf_allocate(current_team%info%heap_mspace, total_size) if (.not. c_associated(whole_block)) then block_offset = -1 ! out of memory else block_offset = as_int(whole_block) - current_team%info%heap_start end if else block_offset = 0 end if call prif_sync_memory ! end the current segment ! Use a co_sum to aggregate broadcasing the information from image 1 ! together with the team barrier spec-required by coarray allocation call prif_co_sum(block_offset) if (block_offset == -1) then ! out of memory - abort allocation attempt call report_error(PRIF_STAT_OUT_OF_MEMORY, out_of_memory_message(size_in_bytes, .true.), & stat, errmsg, errmsg_alloc) if (caf_have_child_teams()) then ! unroll state change above before return call caf_establish_child_heap end if return end if if (me /= 1) whole_block = as_c_ptr(current_team%info%heap_start + block_offset) call c_f_pointer(whole_block, coarray_handle%info) call c_f_pointer(whole_block, unused2, [2]) coarray_handle%info%coarray_data = c_loc(unused2(2)) coarray_handle%info%corank = corank coarray_handle%info%coarray_size = size_in_bytes coarray_handle%info%final_func = final_func coarray_handle%info%lcobounds(1:corank) = lcobounds coarray_handle%info%ucobounds(1:corank-1) = ucobounds(1:corank-1) call compute_coshape_epp(lcobounds, ucobounds, coarray_handle%info%coshape_epp(1:corank)) # if ASSERTIONS ! The following entries are dead, but initialize them to help detect defects coarray_handle%info%lcobounds(corank+1:15) = huge(0_c_int64_t) coarray_handle%info%ucobounds(corank:14) = -huge(0_c_int64_t) coarray_handle%info%coshape_epp(corank+1:15) = 0 # endif coarray_handle%info%previous_handle = c_null_ptr coarray_handle%info%next_handle = c_null_ptr call add_to_team_list(coarray_handle) coarray_handle%info%reserved = c_null_ptr coarray_handle%info%p_context_data = c_loc(coarray_handle%info%reserved) allocated_memory = coarray_handle%info%coarray_data if (caf_have_child_teams()) then call caf_establish_child_heap end if call_assert(coarray_handle_check(coarray_handle)) end procedure module procedure prif_allocate type(c_ptr) :: mem mem = caf_allocate(non_symmetric_heap_mspace, size_in_bytes) if (.not. c_associated(mem)) then call report_error(PRIF_STAT_OUT_OF_MEMORY, out_of_memory_message(size_in_bytes, .false.), & stat, errmsg, errmsg_alloc) else allocated_memory = mem end if end procedure function out_of_memory_message(size_in_bytes, symmetric) result(message) integer(c_size_t), intent(in) :: size_in_bytes logical, intent(in) :: symmetric character(len=:), allocatable :: mem_type character(len=:), allocatable :: message message = "Fortran shared heap is out of memory" if (symmetric) then mem_type = "coarray" else message = message // " on image " // num_to_str(initial_team%this_image) mem_type = "non-coarray" end if message = message // new_line('') & // " while allocating " // num_to_str(size_in_bytes, .true.) // " of additional " & // mem_type // " memory." // new_line('') & // new_line('') & // " Shared heap size information:" // new_line('') & // " Total shared heap: " // pad(num_to_str(total_heap_size, .true.)) & // " (CAF_HEAP_SIZE)" // new_line('') & // " Total non-coarray heap: " // pad(num_to_str(non_symmetric_heap_size, .true.)) & // " (CAF_COMP_FRAC * CAF_HEAP_SIZE)" // new_line('') & // " Current team coarray heap: " // pad(num_to_str(current_team%info%heap_size, .true.)) // new_line('') & // new_line('') & // " Consider setting the CAF_HEAP_SIZE environment variable to request a larger heap." contains function pad(str) result(s) character(len=*), intent(in) :: str character(len=:), allocatable :: s s = str s = repeat(' ',max(0, 10 - len(str))) // s end function end function #if CAF_PRIF_VERSION <= 6 module procedure prif_deallocate_coarray #else module procedure prif_deallocate_coarray call prif_deallocate_coarrays([coarray_handle], stat, errmsg, errmsg_alloc) end procedure module procedure prif_deallocate_coarrays #endif integer :: i, num_handles type(prif_coarray_handle), target :: coarray_handle # if HAVE_FINAL_FUNC_SUPPORT abstract interface subroutine coarray_cleanup_i(handle, stat, errmsg) bind(C) import c_int, prif_coarray_handle implicit none type(prif_coarray_handle), pointer, intent(in) :: handle integer(c_int), intent(out) :: stat character(len=:), intent(out), allocatable :: errmsg end subroutine end interface procedure(coarray_cleanup_i), pointer :: coarray_cleanup integer(c_int) :: local_stat character(len=:), allocatable :: local_errmsg #endif call prif_sync_all ! Need to ensure we don't deallocate anything till everyone gets here num_handles = size(coarray_handles) if (.not. all([(associated(coarray_handles(i)%info), i = 1, num_handles)])) then call report_error(CAF_STAT_INVALID_ARGUMENT, "Attempted to deallocate unallocated coarray", & stat, errmsg, errmsg_alloc) return end if call_assert(all(coarray_handle_check(coarray_handles))) ! invoke finalizers from coarray_handles(:)%info%final_func do i = 1, num_handles coarray_handle = coarray_handles(i) ! Add target attribute if (c_associated(coarray_handle%info%final_func)) then # if HAVE_FINAL_FUNC_SUPPORT call c_f_procpointer(coarray_handle%info%final_func, coarray_cleanup) call coarray_cleanup(coarray_handle, local_stat, local_errmsg) call prif_co_max(local_stat) ! Need to be sure it didn't fail on any images if (local_stat /= 0) then if (.not. allocated(local_errmsg)) then ! provide a default errmsg local_errmsg = "coarray_cleanup finalization callback failed" end if call report_error(local_stat, local_errmsg, & stat, errmsg, errmsg_alloc) return ! NOTE: We no longer have guarantees that coarrays are in consistent state end if # else ! TODO: issue a warning that we are ignoring the final_func? # endif end if end do do i = 1, num_handles call remove_from_team_list(coarray_handles(i)) if (current_team%info%this_image == 1) & call caf_deallocate(current_team%info%heap_mspace, c_loc(coarray_handles(i)%info)) end do if (present(stat)) stat = 0 if (caf_have_child_teams()) then ! reclaim any free space possible for the child teams to use if (current_team%info%this_image == 1) then call caf_deallocate(current_team%info%heap_mspace, current_team%info%child_heap_info%allocated_memory) end if call caf_establish_child_heap end if end procedure module procedure prif_deallocate call caf_deallocate(non_symmetric_heap_mspace, mem) if (present(stat)) stat = 0 end procedure subroutine add_to_team_list(coarray_handle) type(prif_coarray_handle), intent(in) :: coarray_handle call_assert(.not.c_associated(coarray_handle%info%previous_handle)) call_assert(.not.c_associated(coarray_handle%info%next_handle)) if (associated(current_team%info%coarrays)) then current_team%info%coarrays%previous_handle = c_loc(coarray_handle%info) coarray_handle%info%next_handle = c_loc(current_team%info%coarrays) end if current_team%info%coarrays => coarray_handle%info end subroutine subroutine remove_from_team_list(coarray_handle) type(prif_coarray_handle), intent(in) :: coarray_handle type(prif_coarray_descriptor), pointer :: tmp_data if ( .not.c_associated(coarray_handle%info%previous_handle) & .and. .not.c_associated(coarray_handle%info%next_handle)) then call_assert(associated(current_team%info%coarrays, coarray_handle%info)) nullify(current_team%info%coarrays) return end if if (c_associated(coarray_handle%info%previous_handle)) then call c_f_pointer(coarray_handle%info%previous_handle, tmp_data) tmp_data%next_handle = coarray_handle%info%next_handle else call_assert(associated(current_team%info%coarrays, coarray_handle%info)) call c_f_pointer(coarray_handle%info%next_handle, current_team%info%coarrays) end if if (c_associated(coarray_handle%info%next_handle)) then call c_f_pointer(coarray_handle%info%next_handle, tmp_data) tmp_data%previous_handle = coarray_handle%info%previous_handle end if end subroutine end submodule allocation_s fortran-caffeine-0.7.2/src/caffeine/locks_s.F900000664000175000017500000000142315162221361021360 0ustar alastairalastair! Copyright (c), The Regents of the University of California ! Terms of use are as specified in LICENSE.txt submodule(prif:prif_private_s) locks_s ! DO NOT ADD USE STATEMENTS HERE ! All use statements belong in prif_private_s.F90 implicit none contains module procedure prif_lock call unimplemented("prif_lock") if (present(stat)) stat = 0 end procedure module procedure prif_lock_indirect call unimplemented("prif_lock_indirect") if (present(stat)) stat = 0 end procedure module procedure prif_unlock call unimplemented("prif_unlock") if (present(stat)) stat = 0 end procedure module procedure prif_unlock_indirect call unimplemented("prif_unlock_indirect") if (present(stat)) stat = 0 end procedure end submodule locks_s fortran-caffeine-0.7.2/src/caffeine/alias_s.F900000664000175000017500000000500415162221361021335 0ustar alastairalastair! Copyright (c), The Regents of the University of California ! Terms of use are as specified in LICENSE.txt #include "assert_macros.h" #include "language-support.F90" submodule(prif:prif_private_s) alias_s ! DO NOT ADD USE STATEMENTS HERE ! All use statements belong in prif_private_s.F90 implicit none contains module procedure prif_alias_create integer(c_int) :: corank ! validate inputs call_assert(coarray_handle_check(source_handle)) corank = size(alias_lcobounds) call_assert(corank > 0) if (size(alias_ucobounds) == corank) then call_assert(all(alias_lcobounds <= alias_ucobounds)) call_assert(product(alias_ucobounds - alias_lcobounds + 1) >= current_team%info%num_images) else call_assert(size(alias_ucobounds) == corank - 1) call_assert(all(alias_lcobounds(1:corank-1) <= alias_ucobounds)) end if allocate(alias_handle%info) ! start with a copy of the source descriptor alias_handle%info = source_handle%info # if CAF_PRIF_VERSION >= 6 alias_handle%info%coarray_data = & as_c_ptr(as_int(alias_handle%info%coarray_data) + data_pointer_offset) # endif ! apply provided cobounds alias_handle%info%corank = corank alias_handle%info%lcobounds(1:corank) = alias_lcobounds alias_handle%info%ucobounds(1:corank-1) = alias_ucobounds(1:corank-1) call compute_coshape_epp(alias_lcobounds, alias_ucobounds, & alias_handle%info%coshape_epp(1:corank)) # if ASSERTIONS ! The following entries are dead, but initialize them to help detect defects alias_handle%info%lcobounds(corank+1:15) = huge(0_c_int64_t) alias_handle%info%ucobounds(corank:14) = -huge(0_c_int64_t) alias_handle%info%coshape_epp(corank+1:15) = 0 # endif ! reset some fields that are unused in aliases alias_handle%info%reserved = c_null_ptr alias_handle%info%previous_handle = c_null_ptr alias_handle%info%next_handle = c_null_ptr alias_handle%info%final_func = c_null_funptr call_assert(coarray_handle_check(alias_handle)) end procedure module procedure prif_alias_destroy type(prif_coarray_descriptor), pointer :: info call_assert(coarray_handle_check(alias_handle)) info => alias_handle%info call_assert(.not. c_associated(info%reserved)) call_assert(.not. c_associated(info%previous_handle)) call_assert(.not. c_associated(info%next_handle)) call_assert(.not. c_associated(info%final_func)) deallocate(info) end procedure end submodule alias_s fortran-caffeine-0.7.2/src/caffeine/critical_s.F900000664000175000017500000000100215162221361022030 0ustar alastairalastair! Copyright (c), The Regents of the University of California ! Terms of use are as specified in LICENSE.txt submodule(prif:prif_private_s) critical_s ! DO NOT ADD USE STATEMENTS HERE ! All use statements belong in prif_private_s.F90 implicit none contains module procedure prif_critical call unimplemented("prif_critical") if (present(stat)) stat = 0 end procedure module procedure prif_end_critical call unimplemented("prif_end_critical") end procedure end submodule critical_s fortran-caffeine-0.7.2/src/caffeine/image_queries_s.F900000664000175000017500000000623015162221361023065 0ustar alastairalastair! Copyright (c), The Regents of the University of California ! Terms of use are as specified in LICENSE.txt #include "assert_macros.h" submodule(prif:prif_private_s) image_queries_s ! DO NOT ADD USE STATEMENTS HERE ! All use statements belong in prif_private_s.F90 implicit none contains module procedure prif_num_images num_images = current_team%info%num_images end procedure module procedure prif_num_images_with_team num_images = team%info%num_images end procedure module procedure prif_num_images_with_team_number if (team_number == -1) then num_images = initial_team%num_images else if (team_number == current_team%info%team_number) then num_images = current_team%info%num_images else call unimplemented("prif_num_images_with_team_number: no support for sibling teams") end if end procedure module procedure prif_this_image_no_coarray if (present(team)) then this_image = team%info%this_image else this_image = current_team%info%this_image endif end procedure module procedure prif_this_image_with_coarray integer(c_int) :: offset, doff, dsz integer :: dim call_assert(coarray_handle_check(coarray_handle)) call_assert(size(cosubscripts) == coarray_handle%info%corank) if (present(team)) then offset = team%info%this_image - 1 else offset = current_team%info%this_image - 1 endif associate (info => coarray_handle%info) call_assert(size(cosubscripts) == info%corank) do dim = 1, info%corank-1 dsz = INT(info%ucobounds(dim) - info%lcobounds(dim) + 1, c_int) doff = mod(offset, dsz) cosubscripts(dim) = doff + info%lcobounds(dim) call_assert(cosubscripts(dim) <= info%ucobounds(dim)) offset = offset / dsz end do cosubscripts(info%corank) = offset + info%lcobounds(info%corank) end associate # if ASSERTIONS block ! sanity check integer(c_int) :: image_index if (present(team)) then call prif_image_index_with_team(coarray_handle, cosubscripts, team, image_index) call_assert(image_index == team%info%this_image) else call prif_image_index(coarray_handle, cosubscripts, image_index) call_assert(image_index == current_team%info%this_image) end if end block # endif end procedure module procedure prif_this_image_with_dim call_assert(coarray_handle_check(coarray_handle)) block integer(c_int64_t) :: cosubscripts(coarray_handle%info%corank) call_assert(dim >= 1 .and. dim <= coarray_handle%info%corank) call prif_this_image_with_coarray(coarray_handle, team, cosubscripts) cosubscript = cosubscripts(dim) end block end procedure module procedure prif_failed_images ! no current support for detecting image failure allocate(failed_images(0)) end procedure module procedure prif_stopped_images ! no current support for detecting image stops allocate(stopped_images(0)) end procedure module procedure prif_image_status ! no current support for detecting image failure/stops image_status = 0 end procedure end submodule image_queries_s fortran-caffeine-0.7.2/src/caffeine/program_startup_s.F900000664000175000017500000000422015162221361023474 0ustar alastairalastair! Copyright (c), The Regents of the University of California ! Terms of use are as specified in LICENSE.txt submodule(prif:prif_private_s) program_startup_s ! DO NOT ADD USE STATEMENTS HERE ! All use statements belong in prif_private_s.F90 implicit none contains module procedure prif_init use ieee_arithmetic, only: ieee_inexact, ieee_set_flag logical, save :: prif_init_called_previously = .false. if (prif_init_called_previously) then stat = PRIF_STAT_ALREADY_INIT else call caf_caffeinate( & total_heap_size, & initial_team%heap_mspace, & initial_team%heap_start, & initial_team%heap_size, & non_symmetric_heap_mspace, & initial_team%gex_team) call assert_init() current_team%info => initial_team initial_team%parent_team => initial_team initial_team%team_number = -1 initial_team%this_image = caf_this_image(initial_team%gex_team) initial_team%num_images = caf_num_images(initial_team%gex_team) non_symmetric_heap_size = total_heap_size - initial_team%heap_size call sync_init() ! issue #259: Ensure we clear any IEEE FP exceptions potentially ! signalled from within the C-based initialization code above call ieee_set_flag(ieee_inexact, .false.) prif_init_called_previously = .true. stat = 0 end if end procedure #if ASSERT_PARALLEL_CALLBACKS subroutine assert_init() implicit none assert_this_image => assert_callback_this_image assert_error_stop => assert_callback_error_stop end subroutine pure function assert_callback_this_image() result(this_image_id) implicit none integer :: this_image_id this_image_id = initial_team%this_image end function pure subroutine assert_callback_error_stop(stop_code_char) implicit none character(len=*), intent(in) :: stop_code_char character(len=:), allocatable, target :: tmp tmp = stop_code_char call caf_fatal_error(tmp) end subroutine #else subroutine assert_init() end subroutine #endif end submodule program_startup_s fortran-caffeine-0.7.2/src/caffeine/unit_test_parameters_m.F900000664000175000017500000000445115162221361024504 0ustar alastairalastair! Copyright (c), The Regents of the University ! Terms of use are as specified in LICENSE.txt module unit_test_parameters_m use iso_c_binding, only: c_int use prif, only: prif_sync_all, prif_this_image_no_coarray !! Define values and utilities for consistent use throughout the test suite implicit none public integer(c_int), parameter :: expected_stop_code=99, expected_error_stop_code=100 ! used in stop/error-stop unit tests and example/test-support supporting programs character(len=:), allocatable :: subjob_prefix character(len=:), allocatable :: fpm_driver contains ! Retrieve an environment parameter or its default value subroutine getenv_withdefault(key, default, result) use iso_fortran_env, only: error_unit character(len=*), intent(in) :: key, default character(len=:), allocatable, intent(inout) :: result character(len=:), allocatable :: suffix character :: dummy integer :: len ! TODO: it would be preferable to consult the GASNet global environment, when available call get_environment_variable(key, dummy, len) if (len > 0) then allocate(character(len=len)::result) call get_environment_variable(key, result, len) result = trim(adjustl(result)) suffix = "" else result = default suffix = " (default)" endif ! report the envvar in verbose mode call get_environment_variable("GASNET_VERBOSEENV", dummy, len) if (len > 0) then write(error_unit, '(A, T64, A)') "ENV parameter: "//key//"='"//result//"'", suffix end if end subroutine ! subjob support used by stop/error-stop unit tests ! setup for subjob launch, initializes subjob_prefix and ! returns whether this is the first image function subjob_setup() result(result_) logical :: result_ integer :: me if (.not. allocated(subjob_prefix)) then call getenv_withdefault("SUBJOB_PREFIX", "", subjob_prefix) if (len(subjob_prefix) > 0) subjob_prefix = subjob_prefix//" " end if if (.not. allocated(fpm_driver)) then call getenv_withdefault("FPM_DRIVER", "./run-fpm.sh", fpm_driver) end if call prif_sync_all() call prif_this_image_no_coarray(this_image=me) result_ = (me == 1) .and. (trim(subjob_prefix) /= "skip") end function end module unit_test_parameters_m fortran-caffeine-0.7.2/src/caffeine/sync_stmt_s.F900000664000175000017500000000705415162221361022276 0ustar alastairalastair! Copyright (c), The Regents of the University of California ! Terms of use are as specified in LICENSE.txt #include "assert_macros.h" submodule(prif:prif_private_s) sync_stmt_s ! DO NOT ADD USE STATEMENTS HERE ! All use statements belong in prif_private_s.F90 implicit none ! Data structures used to implement prif_sync_images type(prif_coarray_handle) :: si_coarray_handle type(prif_event_type), pointer :: si_evt(:) integer(c_size_t) :: sizeof_event contains module procedure prif_sync_all call caf_sync_team(current_team%info%gex_team) if (present(stat)) stat = 0 end procedure module procedure prif_sync_team call caf_sync_team(team%info%gex_team) if (present(stat)) stat = 0 end procedure module procedure prif_sync_memory call caf_sync_memory if (present(stat)) stat = 0 end procedure module procedure sync_init ! Create the array coarray used to implement prif_sync_images ! This following is effectively: ! type(EVENT_TYPE), allocatable :: si_evt(:)[*] ! ALLOCATE( si_evt(NUM_IMAGES()) ) type(prif_event_type) :: dummy_event type(c_ptr) :: allocated_memory associate(num_imgs => initial_team%num_images) sizeof_event = int(storage_size(dummy_event)/8, c_size_t) call prif_allocate_coarray( & lcobounds = [1_c_int64_t], & ucobounds = [int(num_imgs,c_int64_t)], & size_in_bytes = sizeof_event * num_imgs, & final_func = c_null_funptr, & coarray_handle = si_coarray_handle, & allocated_memory = allocated_memory) call c_f_pointer(allocated_memory, si_evt, [num_imgs]) si_evt = dummy_event ! default initialize end associate end procedure module procedure prif_sync_images integer(c_int) :: i, img, l, u integer(c_intptr_t) :: evt_ptr call_assert(coarray_handle_check(si_coarray_handle)) call caf_sync_memory ! end segment and amortize release fence associate(num_imgs => current_team%info%num_images) if (present(image_set)) then l = lbound(image_set,1) u = ubound(image_set,1) # if ASSERTIONS block ! input validation logical p(num_imgs) p = .false. do i = l,u call_assert(image_set(i) >= 1 .and. image_set(i) <= num_imgs) call_assert_describe(.not. p(image_set(i)), "image indices in SYNC IMAGES are not distinct!") p(image_set(i)) = .true. end do end block # endif else ! SYNC IMAGES (*) l = 1 u = num_imgs endif end associate ! post an event to each peer in my slot do i=l,u if (present(image_set)) then img = image_set(i) else img = i endif img = caf_image_to_initial( current_team%info%gex_team, img ) call base_pointer(si_coarray_handle, img, evt_ptr) evt_ptr = evt_ptr + sizeof_event * (initial_team%this_image - 1) call caf_event_post(img, evt_ptr, & segment_boundary=0, release_fence=0) end do ! reap an event from each peer in its slot ! final iteration issues acquire fence do i=l,u if (present(image_set)) then img = image_set(i) else img = i endif img = caf_image_to_initial( current_team%info%gex_team, img ) call caf_event_wait(c_loc(si_evt(img)), 1_c_int64_t, & segment_boundary=0, & acquire_fence=merge(1,0,i==u)) end do if (present(stat)) stat = 0 end procedure end submodule fortran-caffeine-0.7.2/src/caffeine/co_max_s.F900000664000175000017500000000451515162221361021520 0ustar alastairalastair! Copyright (c), The Regents of the University of California ! Terms of use are as specified in LICENSE.txt #include "assert_macros.h" submodule(prif:prif_private_s) co_max_s ! DO NOT ADD USE STATEMENTS HERE ! All use statements belong in prif_private_s.F90 implicit none contains module procedure prif_co_max if (present(result_image)) then call_assert(result_image >= 1 .and. result_image <= current_team%info%num_images) endif call contiguous_co_max(a, result_image, stat, errmsg, errmsg_alloc) end procedure subroutine contiguous_co_max(a, result_image, stat, errmsg, errmsg_alloc) implicit none type(*), intent(inout), target, contiguous :: a(..) integer(c_int), intent(in), optional :: result_image integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc if (present(stat)) stat=0 call caf_co_max( & a, & optional_value(result_image), & int(product(shape(a)), c_size_t), & current_team%info%gex_team) end subroutine subroutine char_max_wrapper(arg1, arg2_and_out, count, cdata) bind(C) type(c_ptr), intent(in), value :: arg1, arg2_and_out integer(c_size_t), intent(in), value :: count type(c_ptr), intent(in), value :: cdata integer(c_size_t), pointer :: char_len integer(c_size_t) :: i if (count == 0) return call c_f_pointer(cdata, char_len) block character(len=char_len,kind=c_char), pointer :: lhs(:), rhs_and_result(:) call c_f_pointer(arg1, lhs, [count]) call c_f_pointer(arg2_and_out, rhs_and_result, [count]) do i = 1, count if (lhs(i) >= rhs_and_result(i)) rhs_and_result(i) = lhs(i) end do end block end subroutine module procedure prif_co_max_character integer(c_size_t), target :: char_len procedure(prif_operation_wrapper_interface), pointer :: op char_len = len(a) op => char_max_wrapper #if defined(__GFORTRAN__) && 0 ! gfortran 13.2 (sometimes?) crashes on the call below ! internal compiler error: in make_decl_rtl, at varasm.cc:1442 call unimplemented("prif_co_max_character") #else call prif_co_reduce(a, op, c_loc(char_len), result_image, stat, errmsg, errmsg_alloc) #endif end procedure end submodule co_max_s fortran-caffeine-0.7.2/src/caffeine/program_termination_s.F900000664000175000017500000001110315162221361024321 0ustar alastairalastair! Copyright (c), The Regents of the University of California ! Terms of use are as specified in LICENSE.txt submodule(prif:prif_private_s) program_termination_s ! DO NOT ADD USE STATEMENTS HERE ! All use statements belong in prif_private_s.F90 implicit none type :: callback_entry procedure(prif_stop_callback_interface), pointer, nopass :: callback type(callback_entry), pointer :: next => null() end type type(callback_entry), pointer :: callback_list => null() contains ! Do our best to portably flush anything that might be buffered in the Fortran I/O library subroutine flush_all() flush output_unit flush error_unit end subroutine module procedure prif_register_stop_callback type(callback_entry), pointer :: new_entry allocate(new_entry) new_entry%callback => callback if (associated(callback_list)) then new_entry%next => callback_list end if callback_list => new_entry end procedure module procedure prif_stop call flush_all() call prif_sync_all call run_callbacks(.false._c_bool, quiet, stop_code_int, stop_code_char) if (present(stop_code_char)) then call prif_stop_character(quiet, stop_code_char) else call prif_stop_integer(quiet, stop_code_int) end if contains subroutine prif_stop_integer(quiet, stop_code) !! synchronize, stop the executing image, and provide the stop_code, or 0 if not present, as the process exit status logical(c_bool), intent(in) :: quiet integer(c_int), intent(in), optional :: stop_code integer(c_int) :: exit_code if (present(stop_code)) then if (.not. quiet) then write(output_unit, '(A, I0)') "STOP ", stop_code end if exit_code = stop_code else if (.not. quiet) then write(output_unit, '(A)') "STOP" end if exit_code = 0_c_int end if call flush_all() call caf_decaffeinate(exit_code) end subroutine prif_stop_integer subroutine prif_stop_character(quiet, stop_code) !! synchronize, stop the executing image, and provide the stop_code as the process exit status logical(c_bool), intent(in) :: quiet character(len=*), intent(in) :: stop_code if (.not. quiet) then write(output_unit, '(A)') "STOP '" // stop_code // "'" end if call flush_all() call caf_decaffeinate(exit_code=0_c_int) ! does not return end subroutine prif_stop_character end procedure prif_stop module procedure prif_error_stop call flush_all() call run_callbacks(.true._c_bool, quiet, stop_code_int, stop_code_char) if (present(stop_code_char)) then call prif_error_stop_character(quiet, stop_code_char) else call prif_error_stop_integer(quiet, stop_code_int) end if end procedure prif_error_stop subroutine prif_error_stop_character(quiet, stop_code) !! stop all images and provide the stop_code as the process exit status logical(c_bool), intent(in) :: quiet character(len=*), intent(in) :: stop_code if (.not. quiet) then write(error_unit, '(A)') "ERROR STOP '" // stop_code // "'" end if call flush_all() call caf_decaffeinate(1_c_int) ! does not return end subroutine subroutine prif_error_stop_integer(quiet, stop_code) !! stop all images and provide the stop_code, or 1 if not present, as the process exit status logical(c_bool), intent(in) :: quiet integer(c_int), intent(in), optional :: stop_code integer(c_int) :: exit_code if (present(stop_code)) then if (.not.quiet) then write(error_unit,'(A, I0)') "ERROR STOP ", stop_code end if exit_code = stop_code else if (.not.quiet) then write(error_unit,'(A)') "ERROR STOP" end if exit_code = 1_c_int end if call flush_all() call caf_decaffeinate(exit_code) ! does not return end subroutine module procedure prif_fail_image # ifndef CAF_FAIL_IMAGE_SUPPRESS_FLUSH call flush_all() # endif call caf_fail_image() end procedure subroutine run_callbacks(is_error_stop, quiet, stop_code_int, stop_code_char) logical(c_bool), intent(in) :: is_error_stop, quiet integer(c_int), intent(in), optional :: stop_code_int character(len=*), intent(in), optional :: stop_code_char type(callback_entry), pointer :: next_entry next_entry => callback_list do while (associated(next_entry)) call next_entry%callback(is_error_stop, quiet, stop_code_int, stop_code_char) next_entry => next_entry%next end do end subroutine end submodule program_termination_s fortran-caffeine-0.7.2/src/caffeine/coarray_queries_s.F900000664000175000017500000001746315162221361023455 0ustar alastairalastair! Copyright (c), The Regents of the University of California ! Terms of use are as specified in LICENSE.txt #include "assert_macros.h" submodule(prif:prif_private_s) coarray_queries_s ! DO NOT ADD USE STATEMENTS HERE ! All use statements belong in prif_private_s.F90 implicit none contains module procedure prif_lcobound_with_dim call_assert(coarray_handle_check(coarray_handle)) call_assert(dim >= 1 .and. dim <= coarray_handle%info%corank) lcobound = coarray_handle%info%lcobounds(dim) end procedure module procedure prif_lcobound_no_dim call_assert(coarray_handle_check(coarray_handle)) call_assert(size(lcobounds) == coarray_handle%info%corank) lcobounds = coarray_handle%info%lcobounds(1:coarray_handle%info%corank) end procedure module procedure prif_ucobound_with_dim call_assert(coarray_handle_check(coarray_handle)) associate (info => coarray_handle%info, corank => coarray_handle%info%corank) call_assert(dim >= 1 .and. dim <= corank) if (corank == 1) then ! common-case optimization ucobound = info%lcobounds(1) + current_team%info%num_images - 1 elseif (dim < corank) then ucobound = info%ucobounds(dim) else ! compute trailing ucobound, based on current team size call_assert(dim == corank) associate (epp => info%coshape_epp(corank), num_imgs => current_team%info%num_images) if (epp >= num_imgs) then ! optimization to skip a divide ucobound = info%lcobounds(corank) else ucobound = info%lcobounds(corank) + (num_imgs + epp - 1) / epp - 1 end if end associate end if end associate end procedure module procedure prif_ucobound_no_dim call_assert(coarray_handle_check(coarray_handle)) call_assert(size(ucobounds) == coarray_handle%info%corank) associate (corank => coarray_handle%info%corank) ucobounds(1:corank-1) = coarray_handle%info%ucobounds(1:corank-1) call prif_ucobound_with_dim(coarray_handle, corank, ucobounds(corank)) end associate end procedure module procedure prif_coshape integer(c_int64_t) :: trailing_ucobound call_assert(coarray_handle_check(coarray_handle)) call_assert(size(sizes) == coarray_handle%info%corank) associate(info => coarray_handle%info, corank => coarray_handle%info%corank) if (corank == 1) then ! common-case optimization sizes(1) = current_team%info%num_images else sizes(1:corank-1) = info%ucobounds(1:corank-1) - info%lcobounds(1:corank-1) + 1 associate (epp => info%coshape_epp(corank), num_imgs => current_team%info%num_images) if (epp >= num_imgs) then ! optimization to skip a divide sizes(corank) = 1 else sizes(corank) = (num_imgs + epp - 1) / epp end if end associate end if end associate end procedure subroutine image_index_helper(coarray_handle, sub, num_images, image_index) implicit none type(prif_coarray_handle), intent(in) :: coarray_handle integer(c_int64_t), intent(in) :: sub(:) integer(c_int), intent(in) :: num_images integer(c_int), intent(out) :: image_index integer :: dim call_assert(coarray_handle_check(coarray_handle)) associate (info => coarray_handle%info, corank => coarray_handle%info%corank) call_assert(size(sub) == corank) if (sub(1) .lt. info%lcobounds(1) .or. & (corank > 1 .and. sub(1) .gt. info%ucobounds(1))) then image_index = 0 return end if image_index = 1 + INT(sub(1) - info%lcobounds(1), c_int) do dim = 2, size(sub) if (sub(dim) .lt. info%lcobounds(dim) .or. & (dim < corank .and. sub(dim) .gt. info%ucobounds(dim))) then image_index = 0 return end if image_index = image_index + INT(sub(dim) - info%lcobounds(dim), c_int) * info%coshape_epp(dim) end do end associate if (image_index .gt. num_images) then image_index = 0 end if end subroutine module procedure prif_image_index call image_index_helper(coarray_handle, sub, current_team%info%num_images, image_index) end procedure module procedure prif_image_index_with_team call image_index_helper(coarray_handle, sub, team%info%num_images, image_index) end procedure module procedure prif_image_index_with_team_number if (team_number == -1) then call image_index_helper(coarray_handle, sub, initial_team%num_images, image_index) else if (team_number == current_team%info%team_number) then call image_index_helper(coarray_handle, sub, current_team%info%num_images, image_index) else call unimplemented("prif_image_index_with_team_number: no support for sibling teams") end if end procedure !--------------------------------------------------------------------- subroutine initial_index_helper(coarray_handle, sub, team, initial_team_index) implicit none type(prif_coarray_handle), intent(in) :: coarray_handle integer(c_int64_t), intent(in) :: sub(:) type(prif_team_type), intent(in) :: team integer(c_int), intent(out) :: initial_team_index integer :: dim integer(c_int) :: image_index call_assert(coarray_handle_check(coarray_handle)) associate (info => coarray_handle%info, corank => coarray_handle%info%corank) call_assert(size(sub) == corank) call_assert(sub(1) .ge. info%lcobounds(1) .and. (corank == 1 .or. sub(1) .le. info%ucobounds(1))) image_index = 1 + INT(sub(1) - info%lcobounds(1), c_int) do dim = 2, size(sub) call_assert(sub(dim) .ge. info%lcobounds(dim) .and. (dim == corank .or. sub(dim) .le. info%ucobounds(dim))) image_index = image_index + INT(sub(dim) - info%lcobounds(dim), c_int) * info%coshape_epp(dim) end do end associate call_assert(image_index .le. team%info%num_images) initial_team_index = caf_image_to_initial(team%info%gex_team, image_index) call_assert(initial_team_index .ge. 1 .and. initial_team_index .le. initial_team%num_images) end subroutine module procedure prif_initial_team_index call initial_index_helper(coarray_handle, sub, current_team, initial_team_index) if (present(stat)) stat = 0 end procedure module procedure prif_initial_team_index_with_team call initial_index_helper(coarray_handle, sub, team, initial_team_index) if (present(stat)) stat = 0 end procedure module procedure prif_initial_team_index_with_team_number if (team_number == -1) then call initial_index_helper(coarray_handle, sub, prif_team_type(initial_team), initial_team_index) else if (team_number == current_team%info%team_number) then call initial_index_helper(coarray_handle, sub, current_team, initial_team_index) else call unimplemented("prif_initial_team_index_with_team_number: no support for sibling teams") end if if (present(stat)) stat = 0 end procedure !--------------------------------------------------------------------- module procedure prif_local_data_pointer call_assert(coarray_handle_check(coarray_handle)) local_data = coarray_handle%info%coarray_data end procedure module procedure prif_set_context_data type(c_ptr), pointer :: array_context_data call_assert(coarray_handle_check(coarray_handle)) call c_f_pointer(coarray_handle%info%p_context_data, array_context_data) array_context_data = context_data end procedure module procedure prif_get_context_data type(c_ptr), pointer :: array_context_data call_assert(coarray_handle_check(coarray_handle)) call c_f_pointer(coarray_handle%info%p_context_data, array_context_data) context_data = array_context_data end procedure module procedure prif_size_bytes call_assert(coarray_handle_check(coarray_handle)) data_size = coarray_handle%info%coarray_size end procedure end submodule coarray_queries_s fortran-caffeine-0.7.2/src/dlmalloc/0000775000175000017500000000000015162221361017472 5ustar alastairalastairfortran-caffeine-0.7.2/src/dlmalloc/dl_malloc_caf.h0000664000175000017500000000316415162221361022406 0ustar alastairalastair#ifndef _485f7f27_ce8a_4829_a04c_aaa8182adab9 #define _485f7f27_ce8a_4829_a04c_aaa8182adab9 // Added for caffeine: #define ONLY_MSPACES 1 #if CAFI_ASSERT_ENABLED #define DEBUG 1 #else #undef DEBUG #endif /* * Added for caffeine. This block of defines name shifts dlmalloc functions to have * a cafi_ prefix. Since dlmalloc is a commonly used library, name clashes can * occur when two libraries that use dlmalloc are linked to the same application * causing linker errors as they both define the dlmalloc symbols. */ #define create_mspace cafi_create_mspace #define create_mspace_with_base cafi_create_mspace_with_base #define destroy_mspace cafi_destroy_mspace #define mspace_bulk_free cafi_mspace_bulk_free #define mspace_calloc cafi_mspace_calloc #define mspace_footprint cafi_mspace_footprint #define mspace_footprint_limit cafi_mspace_footprint_limit #define mspace_free cafi_mspace_free #define mspace_independent_calloc cafi_mspace_independent_calloc #define mspace_independent_comalloc cafi_mspace_independent_comalloc #define mspace_mallinfo cafi_mspace_mallinfo #define mspace_malloc cafi_mspace_malloc #define mspace_malloc_stats cafi_mspace_malloc_stats #define mspace_mallopt cafi_mspace_mallopt #define mspace_max_footprint cafi_mspace_max_footprint #define mspace_memalign cafi_mspace_memalign #define mspace_realloc cafi_mspace_realloc #define mspace_realloc_in_place cafi_mspace_realloc_in_place #define mspace_set_footprint_limit cafi_mspace_set_footprint_limit #define mspace_track_large_chunks cafi_mspace_track_large_chunks #define mspace_trim cafi_mspace_trim #define mspace_usable_size cafi_mspace_usable_size #endif fortran-caffeine-0.7.2/src/dlmalloc/dl_malloc.c0000664000175000017500000066062415162221361021602 0ustar alastairalastair/* * Added for caffeine, so that the name shift #defines at the top of * dl_malloc_caf.h also mangle the function names in this file. */ #include "dl_malloc_caf.h" /* This is a version (aka dlmalloc) of malloc/free/realloc written by Doug Lea and released to the public domain, as explained at http://creativecommons.org/publicdomain/zero/1.0/ Send questions, comments, complaints, performance data, etc to dl@cs.oswego.edu * Version 2.8.6 Wed Aug 29 06:57:58 2012 Doug Lea Note: There may be an updated version of this malloc obtainable at ftp://gee.cs.oswego.edu/pub/misc/malloc.c Check before installing! * Quickstart This library is all in one file to simplify the most common usage: ftp it, compile it (-O3), and link it into another program. All of the compile-time options default to reasonable values for use on most platforms. You might later want to step through various compile-time and dynamic tuning options. For convenience, an include file for code using this malloc is at: ftp://gee.cs.oswego.edu/pub/misc/malloc-2.8.6.h You don't really need this .h file unless you call functions not defined in your system include files. The .h file contains only the excerpts from this file needed for using this malloc on ANSI C/C++ systems, so long as you haven't changed compile-time options about naming and tuning parameters. If you do, then you can create your own malloc.h that does include all settings by cutting at the point indicated below. Note that you may already by default be using a C library containing a malloc that is based on some version of this malloc (for example in linux). You might still want to use the one in this file to customize settings or to avoid overheads associated with library versions. * Vital statistics: Supported pointer/size_t representation: 4 or 8 bytes size_t MUST be an unsigned type of the same width as pointers. (If you are using an ancient system that declares size_t as a signed type, or need it to be a different width than pointers, you can use a previous release of this malloc (e.g. 2.7.2) supporting these.) Alignment: 8 bytes (minimum) This suffices for nearly all current machines and C compilers. However, you can define MALLOC_ALIGNMENT to be wider than this if necessary (up to 128bytes), at the expense of using more space. Minimum overhead per allocated chunk: 4 or 8 bytes (if 4byte sizes) 8 or 16 bytes (if 8byte sizes) Each malloced chunk has a hidden word of overhead holding size and status information, and additional cross-check word if FOOTERS is defined. Minimum allocated size: 4-byte ptrs: 16 bytes (including overhead) 8-byte ptrs: 32 bytes (including overhead) Even a request for zero bytes (i.e., malloc(0)) returns a pointer to something of the minimum allocatable size. The maximum overhead wastage (i.e., number of extra bytes allocated than were requested in malloc) is less than or equal to the minimum size, except for requests >= mmap_threshold that are serviced via mmap(), where the worst case wastage is about 32 bytes plus the remainder from a system page (the minimal mmap unit); typically 4096 or 8192 bytes. Security: static-safe; optionally more or less The "security" of malloc refers to the ability of malicious code to accentuate the effects of errors (for example, freeing space that is not currently malloc'ed or overwriting past the ends of chunks) in code that calls malloc. This malloc guarantees not to modify any memory locations below the base of heap, i.e., static variables, even in the presence of usage errors. The routines additionally detect most improper frees and reallocs. All this holds as long as the static bookkeeping for malloc itself is not corrupted by some other means. This is only one aspect of security -- these checks do not, and cannot, detect all possible programming errors. If FOOTERS is defined nonzero, then each allocated chunk carries an additional check word to verify that it was malloced from its space. These check words are the same within each execution of a program using malloc, but differ across executions, so externally crafted fake chunks cannot be freed. This improves security by rejecting frees/reallocs that could corrupt heap memory, in addition to the checks preventing writes to statics that are always on. This may further improve security at the expense of time and space overhead. (Note that FOOTERS may also be worth using with MSPACES.) By default detected errors cause the program to abort (calling "abort()"). You can override this to instead proceed past errors by defining PROCEED_ON_ERROR. In this case, a bad free has no effect, and a malloc that encounters a bad address caused by user overwrites will ignore the bad address by dropping pointers and indices to all known memory. This may be appropriate for programs that should continue if at all possible in the face of programming errors, although they may run out of memory because dropped memory is never reclaimed. If you don't like either of these options, you can define CORRUPTION_ERROR_ACTION and USAGE_ERROR_ACTION to do anything else. And if if you are sure that your program using malloc has no errors or vulnerabilities, you can define INSECURE to 1, which might (or might not) provide a small performance improvement. It is also possible to limit the maximum total allocatable space, using malloc_set_footprint_limit. This is not designed as a security feature in itself (calls to set limits are not screened or privileged), but may be useful as one aspect of a secure implementation. Thread-safety: NOT thread-safe unless USE_LOCKS defined non-zero When USE_LOCKS is defined, each public call to malloc, free, etc is surrounded with a lock. By default, this uses a plain pthread mutex, win32 critical section, or a spin-lock if if available for the platform and not disabled by setting USE_SPIN_LOCKS=0. However, if USE_RECURSIVE_LOCKS is defined, recursive versions are used instead (which are not required for base functionality but may be needed in layered extensions). Using a global lock is not especially fast, and can be a major bottleneck. It is designed only to provide minimal protection in concurrent environments, and to provide a basis for extensions. If you are using malloc in a concurrent program, consider instead using nedmalloc (http://www.nedprod.com/programs/portable/nedmalloc/) or ptmalloc (See http://www.malloc.de), which are derived from versions of this malloc. System requirements: Any combination of MORECORE and/or MMAP/MUNMAP This malloc can use unix sbrk or any emulation (invoked using the CALL_MORECORE macro) and/or mmap/munmap or any emulation (invoked using CALL_MMAP/CALL_MUNMAP) to get and release system memory. On most unix systems, it tends to work best if both MORECORE and MMAP are enabled. On Win32, it uses emulations based on VirtualAlloc. It also uses common C library functions like memset. Compliance: I believe it is compliant with the Single Unix Specification (See http://www.unix.org). Also SVID/XPG, ANSI C, and probably others as well. * Overview of algorithms This is not the fastest, most space-conserving, most portable, or most tunable malloc ever written. However it is among the fastest while also being among the most space-conserving, portable and tunable. Consistent balance across these factors results in a good general-purpose allocator for malloc-intensive programs. In most ways, this malloc is a best-fit allocator. Generally, it chooses the best-fitting existing chunk for a request, with ties broken in approximately least-recently-used order. (This strategy normally maintains low fragmentation.) However, for requests less than 256bytes, it deviates from best-fit when there is not an exactly fitting available chunk by preferring to use space adjacent to that used for the previous small request, as well as by breaking ties in approximately most-recently-used order. (These enhance locality of series of small allocations.) And for very large requests (>= 256Kb by default), it relies on system memory mapping facilities, if supported. (This helps avoid carrying around and possibly fragmenting memory used only for large chunks.) All operations (except malloc_stats and mallinfo) have execution times that are bounded by a constant factor of the number of bits in a size_t, not counting any clearing in calloc or copying in realloc, or actions surrounding MORECORE and MMAP that have times proportional to the number of non-contiguous regions returned by system allocation routines, which is often just 1. In real-time applications, you can optionally suppress segment traversals using NO_SEGMENT_TRAVERSAL, which assures bounded execution even when system allocators return non-contiguous spaces, at the typical expense of carrying around more memory and increased fragmentation. The implementation is not very modular and seriously overuses macros. Perhaps someday all C compilers will do as good a job inlining modular code as can now be done by brute-force expansion, but now, enough of them seem not to. Some compilers issue a lot of warnings about code that is dead/unreachable only on some platforms, and also about intentional uses of negation on unsigned types. All known cases of each can be ignored. For a longer but out of date high-level description, see http://gee.cs.oswego.edu/dl/html/malloc.html * MSPACES If MSPACES is defined, then in addition to malloc, free, etc., this file also defines mspace_malloc, mspace_free, etc. These are versions of malloc routines that take an "mspace" argument obtained using create_mspace, to control all internal bookkeeping. If ONLY_MSPACES is defined, only these versions are compiled. So if you would like to use this allocator for only some allocations, and your system malloc for others, you can compile with ONLY_MSPACES and then do something like... static mspace mymspace = create_mspace(0,0); // for example #define mymalloc(bytes) mspace_malloc(mymspace, bytes) (Note: If you only need one instance of an mspace, you can instead use "USE_DL_PREFIX" to relabel the global malloc.) You can similarly create thread-local allocators by storing mspaces as thread-locals. For example: static __thread mspace tlms = 0; void* tlmalloc(size_t bytes) { if (tlms == 0) tlms = create_mspace(0, 0); return mspace_malloc(tlms, bytes); } void tlfree(void* mem) { mspace_free(tlms, mem); } Unless FOOTERS is defined, each mspace is completely independent. You cannot allocate from one and free to another (although conformance is only weakly checked, so usage errors are not always caught). If FOOTERS is defined, then each chunk carries around a tag indicating its originating mspace, and frees are directed to their originating spaces. Normally, this requires use of locks. ------------------------- Compile-time options --------------------------- Be careful in setting #define values for numerical constants of type size_t. On some systems, literal values are not automatically extended to size_t precision unless they are explicitly casted. You can also use the symbolic values MAX_SIZE_T, SIZE_T_ONE, etc below. WIN32 default: defined if _WIN32 defined Defining WIN32 sets up defaults for MS environment and compilers. Otherwise defaults are for unix. Beware that there seem to be some cases where this malloc might not be a pure drop-in replacement for Win32 malloc: Random-looking failures from Win32 GDI API's (eg; SetDIBits()) may be due to bugs in some video driver implementations when pixel buffers are malloc()ed, and the region spans more than one VirtualAlloc()ed region. Because dlmalloc uses a small (64Kb) default granularity, pixel buffers may straddle virtual allocation regions more often than when using the Microsoft allocator. You can avoid this by using VirtualAlloc() and VirtualFree() for all pixel buffers rather than using malloc(). If this is not possible, recompile this malloc with a larger DEFAULT_GRANULARITY. Note: in cases where MSC and gcc (cygwin) are known to differ on WIN32, conditions use _MSC_VER to distinguish them. DLMALLOC_EXPORT default: extern Defines how public APIs are declared. If you want to export via a Windows DLL, you might define this as #define DLMALLOC_EXPORT extern __declspec(dllexport) If you want a POSIX ELF shared object, you might use #define DLMALLOC_EXPORT extern __attribute__((visibility("default"))) MALLOC_ALIGNMENT default: (size_t)(2 * sizeof(void *)) Controls the minimum alignment for malloc'ed chunks. It must be a power of two and at least 8, even on machines for which smaller alignments would suffice. It may be defined as larger than this though. Note however that code and data structures are optimized for the case of 8-byte alignment. MSPACES default: 0 (false) If true, compile in support for independent allocation spaces. This is only supported if HAVE_MMAP is true. ONLY_MSPACES default: 0 (false) If true, only compile in mspace versions, not regular versions. USE_LOCKS default: 0 (false) Causes each call to each public routine to be surrounded with pthread or WIN32 mutex lock/unlock. (If set true, this can be overridden on a per-mspace basis for mspace versions.) If set to a non-zero value other than 1, locks are used, but their implementation is left out, so lock functions must be supplied manually, as described below. USE_SPIN_LOCKS default: 1 iff USE_LOCKS and spin locks available If true, uses custom spin locks for locking. This is currently supported only gcc >= 4.1, older gccs on x86 platforms, and recent MS compilers. Otherwise, posix locks or win32 critical sections are used. USE_RECURSIVE_LOCKS default: not defined If defined nonzero, uses recursive (aka reentrant) locks, otherwise uses plain mutexes. This is not required for malloc proper, but may be needed for layered allocators such as nedmalloc. LOCK_AT_FORK default: not defined If defined nonzero, performs pthread_atfork upon initialization to initialize child lock while holding parent lock. The implementation assumes that pthread locks (not custom locks) are being used. In other cases, you may need to customize the implementation. FOOTERS default: 0 If true, provide extra checking and dispatching by placing information in the footers of allocated chunks. This adds space and time overhead. INSECURE default: 0 If true, omit checks for usage errors and heap space overwrites. USE_DL_PREFIX default: NOT defined Causes compiler to prefix all public routines with the string 'dl'. This can be useful when you only want to use this malloc in one part of a program, using your regular system malloc elsewhere. MALLOC_INSPECT_ALL default: NOT defined If defined, compiles malloc_inspect_all and mspace_inspect_all, that perform traversal of all heap space. Unless access to these functions is otherwise restricted, you probably do not want to include them in secure implementations. ABORT default: defined as abort() Defines how to abort on failed checks. On most systems, a failed check cannot die with an "assert" or even print an informative message, because the underlying print routines in turn call malloc, which will fail again. Generally, the best policy is to simply call abort(). It's not very useful to do more than this because many errors due to overwriting will show up as address faults (null, odd addresses etc) rather than malloc-triggered checks, so will also abort. Also, most compilers know that abort() does not return, so can better optimize code conditionally calling it. PROCEED_ON_ERROR default: defined as 0 (false) Controls whether detected bad addresses cause them to bypassed rather than aborting. If set, detected bad arguments to free and realloc are ignored. And all bookkeeping information is zeroed out upon a detected overwrite of freed heap space, thus losing the ability to ever return it from malloc again, but enabling the application to proceed. If PROCEED_ON_ERROR is defined, the static variable malloc_corruption_error_count is compiled in and can be examined to see if errors have occurred. This option generates slower code than the default abort policy. DEBUG default: NOT defined The DEBUG setting is mainly intended for people trying to modify this code or diagnose problems when porting to new platforms. However, it may also be able to better isolate user errors than just using runtime checks. The assertions in the check routines spell out in more detail the assumptions and invariants underlying the algorithms. The checking is fairly extensive, and will slow down execution noticeably. Calling malloc_stats or mallinfo with DEBUG set will attempt to check every non-mmapped allocated and free chunk in the course of computing the summaries. ABORT_ON_ASSERT_FAILURE default: defined as 1 (true) Debugging assertion failures can be nearly impossible if your version of the assert macro causes malloc to be called, which will lead to a cascade of further failures, blowing the runtime stack. ABORT_ON_ASSERT_FAILURE cause assertions failures to call abort(), which will usually make debugging easier. MALLOC_FAILURE_ACTION default: sets errno to ENOMEM, or no-op on win32 The action to take before "return 0" when malloc fails to be able to return memory because there is none available. HAVE_MORECORE default: 1 (true) unless win32 or ONLY_MSPACES True if this system supports sbrk or an emulation of it. MORECORE default: sbrk The name of the sbrk-style system routine to call to obtain more memory. See below for guidance on writing custom MORECORE functions. The type of the argument to sbrk/MORECORE varies across systems. It cannot be size_t, because it supports negative arguments, so it is normally the signed type of the same width as size_t (sometimes declared as "intptr_t"). It doesn't much matter though. Internally, we only call it with arguments less than half the max value of a size_t, which should work across all reasonable possibilities, although sometimes generating compiler warnings. MORECORE_CONTIGUOUS default: 1 (true) if HAVE_MORECORE If true, take advantage of fact that consecutive calls to MORECORE with positive arguments always return contiguous increasing addresses. This is true of unix sbrk. It does not hurt too much to set it true anyway, since malloc copes with non-contiguities. Setting it false when definitely non-contiguous saves time and possibly wasted space it would take to discover this though. MORECORE_CANNOT_TRIM default: NOT defined True if MORECORE cannot release space back to the system when given negative arguments. This is generally necessary only if you are using a hand-crafted MORECORE function that cannot handle negative arguments. NO_SEGMENT_TRAVERSAL default: 0 If non-zero, suppresses traversals of memory segments returned by either MORECORE or CALL_MMAP. This disables merging of segments that are contiguous, and selectively releasing them to the OS if unused, but bounds execution times. HAVE_MMAP default: 1 (true) True if this system supports mmap or an emulation of it. If so, and HAVE_MORECORE is not true, MMAP is used for all system allocation. If set and HAVE_MORECORE is true as well, MMAP is primarily used to directly allocate very large blocks. It is also used as a backup strategy in cases where MORECORE fails to provide space from system. Note: A single call to MUNMAP is assumed to be able to unmap memory that may have be allocated using multiple calls to MMAP, so long as they are adjacent. HAVE_MREMAP default: 1 on linux, else 0 If true realloc() uses mremap() to re-allocate large blocks and extend or shrink allocation spaces. MMAP_CLEARS default: 1 except on WINCE. True if mmap clears memory so calloc doesn't need to. This is true for standard unix mmap using /dev/zero and on WIN32 except for WINCE. USE_BUILTIN_FFS default: 0 (i.e., not used) Causes malloc to use the builtin ffs() function to compute indices. Some compilers may recognize and intrinsify ffs to be faster than the supplied C version. Also, the case of x86 using gcc is special-cased to an asm instruction, so is already as fast as it can be, and so this setting has no effect. Similarly for Win32 under recent MS compilers. (On most x86s, the asm version is only slightly faster than the C version.) malloc_getpagesize default: derive from system includes, or 4096. The system page size. To the extent possible, this malloc manages memory from the system in page-size units. This may be (and usually is) a function rather than a constant. This is ignored if WIN32, where page size is determined using getSystemInfo during initialization. USE_DEV_RANDOM default: 0 (i.e., not used) Causes malloc to use /dev/random to initialize secure magic seed for stamping footers. Otherwise, the current time is used. NO_MALLINFO default: 0 If defined, don't compile "mallinfo". This can be a simple way of dealing with mismatches between system declarations and those in this file. MALLINFO_FIELD_TYPE default: size_t The type of the fields in the mallinfo struct. This was originally defined as "int" in SVID etc, but is more usefully defined as size_t. The value is used only if HAVE_USR_INCLUDE_MALLOC_H is not set NO_MALLOC_STATS default: 0 If defined, don't compile "malloc_stats". This avoids calls to fprintf and bringing in stdio dependencies you might not want. REALLOC_ZERO_BYTES_FREES default: not defined This should be set if a call to realloc with zero bytes should be the same as a call to free. Some people think it should. Otherwise, since this malloc returns a unique pointer for malloc(0), so does realloc(p, 0). LACKS_UNISTD_H, LACKS_FCNTL_H, LACKS_SYS_PARAM_H, LACKS_SYS_MMAN_H LACKS_STRINGS_H, LACKS_STRING_H, LACKS_SYS_TYPES_H, LACKS_ERRNO_H LACKS_STDLIB_H LACKS_SCHED_H LACKS_TIME_H default: NOT defined unless on WIN32 Define these if your system does not have these header files. You might need to manually insert some of the declarations they provide. DEFAULT_GRANULARITY default: page size if MORECORE_CONTIGUOUS, system_info.dwAllocationGranularity in WIN32, otherwise 64K. Also settable using mallopt(M_GRANULARITY, x) The unit for allocating and deallocating memory from the system. On most systems with contiguous MORECORE, there is no reason to make this more than a page. However, systems with MMAP tend to either require or encourage larger granularities. You can increase this value to prevent system allocation functions to be called so often, especially if they are slow. The value must be at least one page and must be a power of two. Setting to 0 causes initialization to either page size or win32 region size. (Note: In previous versions of malloc, the equivalent of this option was called "TOP_PAD") DEFAULT_TRIM_THRESHOLD default: 2MB Also settable using mallopt(M_TRIM_THRESHOLD, x) The maximum amount of unused top-most memory to keep before releasing via malloc_trim in free(). Automatic trimming is mainly useful in long-lived programs using contiguous MORECORE. Because trimming via sbrk can be slow on some systems, and can sometimes be wasteful (in cases where programs immediately afterward allocate more large chunks) the value should be high enough so that your overall system performance would improve by releasing this much memory. As a rough guide, you might set to a value close to the average size of a process (program) running on your system. Releasing this much memory would allow such a process to run in memory. Generally, it is worth tuning trim thresholds when a program undergoes phases where several large chunks are allocated and released in ways that can reuse each other's storage, perhaps mixed with phases where there are no such chunks at all. The trim value must be greater than page size to have any useful effect. To disable trimming completely, you can set to MAX_SIZE_T. Note that the trick some people use of mallocing a huge space and then freeing it at program startup, in an attempt to reserve system memory, doesn't have the intended effect under automatic trimming, since that memory will immediately be returned to the system. DEFAULT_MMAP_THRESHOLD default: 256K Also settable using mallopt(M_MMAP_THRESHOLD, x) The request size threshold for using MMAP to directly service a request. Requests of at least this size that cannot be allocated using already-existing space will be serviced via mmap. (If enough normal freed space already exists it is used instead.) Using mmap segregates relatively large chunks of memory so that they can be individually obtained and released from the host system. A request serviced through mmap is never reused by any other request (at least not directly; the system may just so happen to remap successive requests to the same locations). Segregating space in this way has the benefits that: Mmapped space can always be individually released back to the system, which helps keep the system level memory demands of a long-lived program low. Also, mapped memory doesn't become `locked' between other chunks, as can happen with normally allocated chunks, which means that even trimming via malloc_trim would not release them. However, it has the disadvantage that the space cannot be reclaimed, consolidated, and then used to service later requests, as happens with normal chunks. The advantages of mmap nearly always outweigh disadvantages for "large" chunks, but the value of "large" may vary across systems. The default is an empirically derived value that works well in most systems. You can disable mmap by setting to MAX_SIZE_T. MAX_RELEASE_CHECK_RATE default: 4095 unless not HAVE_MMAP The number of consolidated frees between checks to release unused segments when freeing. When using non-contiguous segments, especially with multiple mspaces, checking only for topmost space doesn't always suffice to trigger trimming. To compensate for this, free() will, with a period of MAX_RELEASE_CHECK_RATE (or the current number of segments, if greater) try to release unused segments to the OS when freeing chunks that result in consolidation. The best value for this parameter is a compromise between slowing down frees with relatively costly checks that rarely trigger versus holding on to unused memory. To effectively disable, set to MAX_SIZE_T. This may lead to a very slight speed improvement at the expense of carrying around more memory. */ /* Version identifier to allow people to support multiple versions */ #ifndef DLMALLOC_VERSION #define DLMALLOC_VERSION 20806 #endif /* DLMALLOC_VERSION */ #ifndef DLMALLOC_EXPORT #define DLMALLOC_EXPORT extern #endif #ifndef WIN32 #ifdef _WIN32 #define WIN32 1 #endif /* _WIN32 */ #ifdef _WIN32_WCE #define LACKS_FCNTL_H #define WIN32 1 #endif /* _WIN32_WCE */ #endif /* WIN32 */ #ifdef WIN32 #define WIN32_LEAN_AND_MEAN #include #include #define HAVE_MMAP 1 #define HAVE_MORECORE 0 #define LACKS_UNISTD_H #define LACKS_SYS_PARAM_H #define LACKS_SYS_MMAN_H #define LACKS_STRING_H #define LACKS_STRINGS_H #define LACKS_SYS_TYPES_H #define LACKS_ERRNO_H #define LACKS_SCHED_H #ifndef MALLOC_FAILURE_ACTION #define MALLOC_FAILURE_ACTION #endif /* MALLOC_FAILURE_ACTION */ #ifndef MMAP_CLEARS #ifdef _WIN32_WCE /* WINCE reportedly does not clear */ #define MMAP_CLEARS 0 #else #define MMAP_CLEARS 1 #endif /* _WIN32_WCE */ #endif /*MMAP_CLEARS */ #endif /* WIN32 */ #if defined(DARWIN) || defined(_DARWIN) /* Mac OSX docs advise not to use sbrk; it seems better to use mmap */ #ifndef HAVE_MORECORE #define HAVE_MORECORE 0 #define HAVE_MMAP 1 /* OSX allocators provide 16 byte alignment */ #ifndef MALLOC_ALIGNMENT #define MALLOC_ALIGNMENT ((size_t)16U) #endif #endif /* HAVE_MORECORE */ #endif /* DARWIN */ #ifndef LACKS_SYS_TYPES_H #include /* For size_t */ #endif /* LACKS_SYS_TYPES_H */ /* The maximum possible size_t value has all bits set */ #define MAX_SIZE_T (~(size_t)0) #ifndef USE_LOCKS /* ensure true if spin or recursive locks set */ #define USE_LOCKS (USE_SPIN_LOCKS || USE_RECURSIVE_LOCKS) #endif /* USE_LOCKS */ #if USE_LOCKS /* Spin locks for gcc >= 4.1, older gcc on x86, MSC >= 1310 */ #if ((defined(__GNUC__) && \ ((__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) || \ defined(__i386__) || defined(__x86_64__))) || \ (defined(_MSC_VER) && _MSC_VER>=1310)) #ifndef USE_SPIN_LOCKS #define USE_SPIN_LOCKS 1 #endif /* USE_SPIN_LOCKS */ #elif USE_SPIN_LOCKS #error "USE_SPIN_LOCKS defined without implementation" #endif /* ... locks available... */ #elif !defined(USE_SPIN_LOCKS) #define USE_SPIN_LOCKS 0 #endif /* USE_LOCKS */ #ifndef ONLY_MSPACES #define ONLY_MSPACES 0 #endif /* ONLY_MSPACES */ #ifndef MSPACES #if ONLY_MSPACES #define MSPACES 1 #else /* ONLY_MSPACES */ #define MSPACES 0 #endif /* ONLY_MSPACES */ #endif /* MSPACES */ #ifndef MALLOC_ALIGNMENT #define MALLOC_ALIGNMENT ((size_t)(2 * sizeof(void *))) #endif /* MALLOC_ALIGNMENT */ #ifndef FOOTERS #define FOOTERS 0 #endif /* FOOTERS */ #ifndef ABORT #define ABORT abort() #endif /* ABORT */ #ifndef ABORT_ON_ASSERT_FAILURE #define ABORT_ON_ASSERT_FAILURE 1 #endif /* ABORT_ON_ASSERT_FAILURE */ #ifndef PROCEED_ON_ERROR #define PROCEED_ON_ERROR 0 #endif /* PROCEED_ON_ERROR */ #ifndef INSECURE #define INSECURE 0 #endif /* INSECURE */ #ifndef MALLOC_INSPECT_ALL #define MALLOC_INSPECT_ALL 0 #endif /* MALLOC_INSPECT_ALL */ #ifndef HAVE_MMAP #define HAVE_MMAP 1 #endif /* HAVE_MMAP */ #ifndef MMAP_CLEARS #define MMAP_CLEARS 1 #endif /* MMAP_CLEARS */ #ifndef HAVE_MREMAP #ifdef linux #define HAVE_MREMAP 1 #ifndef _GNU_SOURCE #define _GNU_SOURCE /* Turns on mremap() definition */ #endif #else /* linux */ #define HAVE_MREMAP 0 #endif /* linux */ #endif /* HAVE_MREMAP */ #ifndef MALLOC_FAILURE_ACTION #define MALLOC_FAILURE_ACTION errno = ENOMEM; #endif /* MALLOC_FAILURE_ACTION */ #ifndef HAVE_MORECORE #if ONLY_MSPACES #define HAVE_MORECORE 0 #else /* ONLY_MSPACES */ #define HAVE_MORECORE 1 #endif /* ONLY_MSPACES */ #endif /* HAVE_MORECORE */ #if !HAVE_MORECORE #define MORECORE_CONTIGUOUS 0 #else /* !HAVE_MORECORE */ #define MORECORE_DEFAULT sbrk #ifndef MORECORE_CONTIGUOUS #define MORECORE_CONTIGUOUS 1 #endif /* MORECORE_CONTIGUOUS */ #endif /* HAVE_MORECORE */ #ifndef DEFAULT_GRANULARITY #if (MORECORE_CONTIGUOUS || defined(WIN32)) #define DEFAULT_GRANULARITY (0) /* 0 means to compute in init_mparams */ #else /* MORECORE_CONTIGUOUS */ #define DEFAULT_GRANULARITY ((size_t)64U * (size_t)1024U) #endif /* MORECORE_CONTIGUOUS */ #endif /* DEFAULT_GRANULARITY */ #ifndef DEFAULT_TRIM_THRESHOLD #ifndef MORECORE_CANNOT_TRIM #define DEFAULT_TRIM_THRESHOLD ((size_t)2U * (size_t)1024U * (size_t)1024U) #else /* MORECORE_CANNOT_TRIM */ #define DEFAULT_TRIM_THRESHOLD MAX_SIZE_T #endif /* MORECORE_CANNOT_TRIM */ #endif /* DEFAULT_TRIM_THRESHOLD */ #ifndef DEFAULT_MMAP_THRESHOLD #if HAVE_MMAP #define DEFAULT_MMAP_THRESHOLD ((size_t)256U * (size_t)1024U) #else /* HAVE_MMAP */ #define DEFAULT_MMAP_THRESHOLD MAX_SIZE_T #endif /* HAVE_MMAP */ #endif /* DEFAULT_MMAP_THRESHOLD */ #ifndef MAX_RELEASE_CHECK_RATE #if HAVE_MMAP #define MAX_RELEASE_CHECK_RATE 4095 #else #define MAX_RELEASE_CHECK_RATE MAX_SIZE_T #endif /* HAVE_MMAP */ #endif /* MAX_RELEASE_CHECK_RATE */ #ifndef USE_BUILTIN_FFS #define USE_BUILTIN_FFS 0 #endif /* USE_BUILTIN_FFS */ #ifndef USE_DEV_RANDOM #define USE_DEV_RANDOM 0 #endif /* USE_DEV_RANDOM */ #ifndef NO_MALLINFO #define NO_MALLINFO 0 #endif /* NO_MALLINFO */ #ifndef MALLINFO_FIELD_TYPE #define MALLINFO_FIELD_TYPE size_t #endif /* MALLINFO_FIELD_TYPE */ #ifndef NO_MALLOC_STATS #define NO_MALLOC_STATS 0 #endif /* NO_MALLOC_STATS */ #ifndef NO_SEGMENT_TRAVERSAL #define NO_SEGMENT_TRAVERSAL 0 #endif /* NO_SEGMENT_TRAVERSAL */ /* mallopt tuning options. SVID/XPG defines four standard parameter numbers for mallopt, normally defined in malloc.h. None of these are used in this malloc, so setting them has no effect. But this malloc does support the following options. */ #define M_TRIM_THRESHOLD (-1) #define M_GRANULARITY (-2) #define M_MMAP_THRESHOLD (-3) /* ------------------------ Mallinfo declarations ------------------------ */ #if !NO_MALLINFO /* This version of malloc supports the standard SVID/XPG mallinfo routine that returns a struct containing usage properties and statistics. It should work on any system that has a /usr/include/malloc.h defining struct mallinfo. The main declaration needed is the mallinfo struct that is returned (by-copy) by mallinfo(). The malloinfo struct contains a bunch of fields that are not even meaningful in this version of malloc. These fields are are instead filled by mallinfo() with other numbers that might be of interest. HAVE_USR_INCLUDE_MALLOC_H should be set if you have a /usr/include/malloc.h file that includes a declaration of struct mallinfo. If so, it is included; else a compliant version is declared below. These must be precisely the same for mallinfo() to work. The original SVID version of this struct, defined on most systems with mallinfo, declares all fields as ints. But some others define as unsigned long. If your system defines the fields using a type of different width than listed here, you MUST #include your system version and #define HAVE_USR_INCLUDE_MALLOC_H. */ /* #define HAVE_USR_INCLUDE_MALLOC_H */ #ifdef HAVE_USR_INCLUDE_MALLOC_H #include "/usr/include/malloc.h" #else /* HAVE_USR_INCLUDE_MALLOC_H */ #ifndef STRUCT_MALLINFO_DECLARED /* HP-UX (and others?) redefines mallinfo unless _STRUCT_MALLINFO is defined */ #define _STRUCT_MALLINFO #define STRUCT_MALLINFO_DECLARED 1 struct mallinfo { MALLINFO_FIELD_TYPE arena; /* non-mmapped space allocated from system */ MALLINFO_FIELD_TYPE ordblks; /* number of free chunks */ MALLINFO_FIELD_TYPE smblks; /* always 0 */ MALLINFO_FIELD_TYPE hblks; /* always 0 */ MALLINFO_FIELD_TYPE hblkhd; /* space in mmapped regions */ MALLINFO_FIELD_TYPE usmblks; /* maximum total allocated space */ MALLINFO_FIELD_TYPE fsmblks; /* always 0 */ MALLINFO_FIELD_TYPE uordblks; /* total allocated space */ MALLINFO_FIELD_TYPE fordblks; /* total free space */ MALLINFO_FIELD_TYPE keepcost; /* releasable (via malloc_trim) space */ }; #endif /* STRUCT_MALLINFO_DECLARED */ #endif /* HAVE_USR_INCLUDE_MALLOC_H */ #endif /* NO_MALLINFO */ /* Try to persuade compilers to inline. The most critical functions for inlining are defined as macros, so these aren't used for them. */ #ifndef FORCEINLINE #if defined(__GNUC__) #define FORCEINLINE __inline __attribute__ ((always_inline)) #elif defined(_MSC_VER) #define FORCEINLINE __forceinline #endif #endif #ifndef NOINLINE #if defined(__GNUC__) #define NOINLINE __attribute__ ((noinline)) #elif defined(_MSC_VER) #define NOINLINE __declspec(noinline) #else #define NOINLINE #endif #endif #ifdef __cplusplus extern "C" { #ifndef FORCEINLINE #define FORCEINLINE inline #endif #endif /* __cplusplus */ #ifndef FORCEINLINE #define FORCEINLINE #endif #if !ONLY_MSPACES /* ------------------- Declarations of public routines ------------------- */ #ifndef USE_DL_PREFIX #define dlcalloc calloc #define dlfree free #define dlmalloc malloc #define dlmemalign memalign #define dlposix_memalign posix_memalign #define dlrealloc realloc #define dlrealloc_in_place realloc_in_place #define dlvalloc valloc #define dlpvalloc pvalloc #define dlmallinfo mallinfo #define dlmallopt mallopt #define dlmalloc_trim malloc_trim #define dlmalloc_stats malloc_stats #define dlmalloc_usable_size malloc_usable_size #define dlmalloc_footprint malloc_footprint #define dlmalloc_max_footprint malloc_max_footprint #define dlmalloc_footprint_limit malloc_footprint_limit #define dlmalloc_set_footprint_limit malloc_set_footprint_limit #define dlmalloc_inspect_all malloc_inspect_all #define dlindependent_calloc independent_calloc #define dlindependent_comalloc independent_comalloc #define dlbulk_free bulk_free #endif /* USE_DL_PREFIX */ /* malloc(size_t n) Returns a pointer to a newly allocated chunk of at least n bytes, or null if no space is available, in which case errno is set to ENOMEM on ANSI C systems. If n is zero, malloc returns a minimum-sized chunk. (The minimum size is 16 bytes on most 32bit systems, and 32 bytes on 64bit systems.) Note that size_t is an unsigned type, so calls with arguments that would be negative if signed are interpreted as requests for huge amounts of space, which will often fail. The maximum supported value of n differs across systems, but is in all cases less than the maximum representable value of a size_t. */ DLMALLOC_EXPORT void* dlmalloc(size_t); /* free(void* p) Releases the chunk of memory pointed to by p, that had been previously allocated using malloc or a related routine such as realloc. It has no effect if p is null. If p was not malloced or already freed, free(p) will by default cause the current program to abort. */ DLMALLOC_EXPORT void dlfree(void*); /* calloc(size_t n_elements, size_t element_size); Returns a pointer to n_elements * element_size bytes, with all locations set to zero. */ DLMALLOC_EXPORT void* dlcalloc(size_t, size_t); /* realloc(void* p, size_t n) Returns a pointer to a chunk of size n that contains the same data as does chunk p up to the minimum of (n, p's size) bytes, or null if no space is available. The returned pointer may or may not be the same as p. The algorithm prefers extending p in most cases when possible, otherwise it employs the equivalent of a malloc-copy-free sequence. If p is null, realloc is equivalent to malloc. If space is not available, realloc returns null, errno is set (if on ANSI) and p is NOT freed. if n is for fewer bytes than already held by p, the newly unused space is lopped off and freed if possible. realloc with a size argument of zero (re)allocates a minimum-sized chunk. The old unix realloc convention of allowing the last-free'd chunk to be used as an argument to realloc is not supported. */ DLMALLOC_EXPORT void* dlrealloc(void*, size_t); /* realloc_in_place(void* p, size_t n) Resizes the space allocated for p to size n, only if this can be done without moving p (i.e., only if there is adjacent space available if n is greater than p's current allocated size, or n is less than or equal to p's size). This may be used instead of plain realloc if an alternative allocation strategy is needed upon failure to expand space; for example, reallocation of a buffer that must be memory-aligned or cleared. You can use realloc_in_place to trigger these alternatives only when needed. Returns p if successful; otherwise null. */ DLMALLOC_EXPORT void* dlrealloc_in_place(void*, size_t); /* memalign(size_t alignment, size_t n); Returns a pointer to a newly allocated chunk of n bytes, aligned in accord with the alignment argument. The alignment argument should be a power of two. If the argument is not a power of two, the nearest greater power is used. 8-byte alignment is guaranteed by normal malloc calls, so don't bother calling memalign with an argument of 8 or less. Overreliance on memalign is a sure way to fragment space. */ DLMALLOC_EXPORT void* dlmemalign(size_t, size_t); /* int posix_memalign(void** pp, size_t alignment, size_t n); Allocates a chunk of n bytes, aligned in accord with the alignment argument. Differs from memalign only in that it (1) assigns the allocated memory to *pp rather than returning it, (2) fails and returns EINVAL if the alignment is not a power of two (3) fails and returns ENOMEM if memory cannot be allocated. */ DLMALLOC_EXPORT int dlposix_memalign(void**, size_t, size_t); /* valloc(size_t n); Equivalent to memalign(pagesize, n), where pagesize is the page size of the system. If the pagesize is unknown, 4096 is used. */ DLMALLOC_EXPORT void* dlvalloc(size_t); /* mallopt(int parameter_number, int parameter_value) Sets tunable parameters The format is to provide a (parameter-number, parameter-value) pair. mallopt then sets the corresponding parameter to the argument value if it can (i.e., so long as the value is meaningful), and returns 1 if successful else 0. To workaround the fact that mallopt is specified to use int, not size_t parameters, the value -1 is specially treated as the maximum unsigned size_t value. SVID/XPG/ANSI defines four standard param numbers for mallopt, normally defined in malloc.h. None of these are use in this malloc, so setting them has no effect. But this malloc also supports other options in mallopt. See below for details. Briefly, supported parameters are as follows (listed defaults are for "typical" configurations). Symbol param # default allowed param values M_TRIM_THRESHOLD -1 2*1024*1024 any (-1 disables) M_GRANULARITY -2 page size any power of 2 >= page size M_MMAP_THRESHOLD -3 256*1024 any (or 0 if no MMAP support) */ DLMALLOC_EXPORT int dlmallopt(int, int); /* malloc_footprint(); Returns the number of bytes obtained from the system. The total number of bytes allocated by malloc, realloc etc., is less than this value. Unlike mallinfo, this function returns only a precomputed result, so can be called frequently to monitor memory consumption. Even if locks are otherwise defined, this function does not use them, so results might not be up to date. */ DLMALLOC_EXPORT size_t dlmalloc_footprint(void); /* malloc_max_footprint(); Returns the maximum number of bytes obtained from the system. This value will be greater than current footprint if deallocated space has been reclaimed by the system. The peak number of bytes allocated by malloc, realloc etc., is less than this value. Unlike mallinfo, this function returns only a precomputed result, so can be called frequently to monitor memory consumption. Even if locks are otherwise defined, this function does not use them, so results might not be up to date. */ DLMALLOC_EXPORT size_t dlmalloc_max_footprint(void); /* malloc_footprint_limit(); Returns the number of bytes that the heap is allowed to obtain from the system, returning the last value returned by malloc_set_footprint_limit, or the maximum size_t value if never set. The returned value reflects a permission. There is no guarantee that this number of bytes can actually be obtained from the system. */ DLMALLOC_EXPORT size_t dlmalloc_footprint_limit(); /* malloc_set_footprint_limit(); Sets the maximum number of bytes to obtain from the system, causing failure returns from malloc and related functions upon attempts to exceed this value. The argument value may be subject to page rounding to an enforceable limit; this actual value is returned. Using an argument of the maximum possible size_t effectively disables checks. If the argument is less than or equal to the current malloc_footprint, then all future allocations that require additional system memory will fail. However, invocation cannot retroactively deallocate existing used memory. */ DLMALLOC_EXPORT size_t dlmalloc_set_footprint_limit(size_t bytes); #if MALLOC_INSPECT_ALL /* malloc_inspect_all(void(*handler)(void *start, void *end, size_t used_bytes, void* callback_arg), void* arg); Traverses the heap and calls the given handler for each managed region, skipping all bytes that are (or may be) used for bookkeeping purposes. Traversal does not include include chunks that have been directly memory mapped. Each reported region begins at the start address, and continues up to but not including the end address. The first used_bytes of the region contain allocated data. If used_bytes is zero, the region is unallocated. The handler is invoked with the given callback argument. If locks are defined, they are held during the entire traversal. It is a bad idea to invoke other malloc functions from within the handler. For example, to count the number of in-use chunks with size greater than 1000, you could write: static int count = 0; void count_chunks(void* start, void* end, size_t used, void* arg) { if (used >= 1000) ++count; } then: malloc_inspect_all(count_chunks, NULL); malloc_inspect_all is compiled only if MALLOC_INSPECT_ALL is defined. */ DLMALLOC_EXPORT void dlmalloc_inspect_all(void(*handler)(void*, void *, size_t, void*), void* arg); #endif /* MALLOC_INSPECT_ALL */ #if !NO_MALLINFO /* mallinfo() Returns (by copy) a struct containing various summary statistics: arena: current total non-mmapped bytes allocated from system ordblks: the number of free chunks smblks: always zero. hblks: current number of mmapped regions hblkhd: total bytes held in mmapped regions usmblks: the maximum total allocated space. This will be greater than current total if trimming has occurred. fsmblks: always zero uordblks: current total allocated space (normal or mmapped) fordblks: total free space keepcost: the maximum number of bytes that could ideally be released back to system via malloc_trim. ("ideally" means that it ignores page restrictions etc.) Because these fields are ints, but internal bookkeeping may be kept as longs, the reported values may wrap around zero and thus be inaccurate. */ DLMALLOC_EXPORT struct mallinfo dlmallinfo(void); #endif /* NO_MALLINFO */ /* independent_calloc(size_t n_elements, size_t element_size, void* chunks[]); independent_calloc is similar to calloc, but instead of returning a single cleared space, it returns an array of pointers to n_elements independent elements that can hold contents of size elem_size, each of which starts out cleared, and can be independently freed, realloc'ed etc. The elements are guaranteed to be adjacently allocated (this is not guaranteed to occur with multiple callocs or mallocs), which may also improve cache locality in some applications. The "chunks" argument is optional (i.e., may be null, which is probably the most typical usage). If it is null, the returned array is itself dynamically allocated and should also be freed when it is no longer needed. Otherwise, the chunks array must be of at least n_elements in length. It is filled in with the pointers to the chunks. In either case, independent_calloc returns this pointer array, or null if the allocation failed. If n_elements is zero and "chunks" is null, it returns a chunk representing an array with zero elements (which should be freed if not wanted). Each element must be freed when it is no longer needed. This can be done all at once using bulk_free. independent_calloc simplifies and speeds up implementations of many kinds of pools. It may also be useful when constructing large data structures that initially have a fixed number of fixed-sized nodes, but the number is not known at compile time, and some of the nodes may later need to be freed. For example: struct Node { int item; struct Node* next; }; struct Node* build_list() { struct Node** pool; int n = read_number_of_nodes_needed(); if (n <= 0) return 0; pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0); if (pool == 0) die(); // organize into a linked list... struct Node* first = pool[0]; for (i = 0; i < n-1; ++i) pool[i]->next = pool[i+1]; free(pool); // Can now free the array (or not, if it is needed later) return first; } */ DLMALLOC_EXPORT void** dlindependent_calloc(size_t, size_t, void**); /* independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]); independent_comalloc allocates, all at once, a set of n_elements chunks with sizes indicated in the "sizes" array. It returns an array of pointers to these elements, each of which can be independently freed, realloc'ed etc. The elements are guaranteed to be adjacently allocated (this is not guaranteed to occur with multiple callocs or mallocs), which may also improve cache locality in some applications. The "chunks" argument is optional (i.e., may be null). If it is null the returned array is itself dynamically allocated and should also be freed when it is no longer needed. Otherwise, the chunks array must be of at least n_elements in length. It is filled in with the pointers to the chunks. In either case, independent_comalloc returns this pointer array, or null if the allocation failed. If n_elements is zero and chunks is null, it returns a chunk representing an array with zero elements (which should be freed if not wanted). Each element must be freed when it is no longer needed. This can be done all at once using bulk_free. independent_comallac differs from independent_calloc in that each element may have a different size, and also that it does not automatically clear elements. independent_comalloc can be used to speed up allocation in cases where several structs or objects must always be allocated at the same time. For example: struct Head { ... } struct Foot { ... } void send_message(char* msg) { int msglen = strlen(msg); size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) }; void* chunks[3]; if (independent_comalloc(3, sizes, chunks) == 0) die(); struct Head* head = (struct Head*)(chunks[0]); char* body = (char*)(chunks[1]); struct Foot* foot = (struct Foot*)(chunks[2]); // ... } In general though, independent_comalloc is worth using only for larger values of n_elements. For small values, you probably won't detect enough difference from series of malloc calls to bother. Overuse of independent_comalloc can increase overall memory usage, since it cannot reuse existing noncontiguous small chunks that might be available for some of the elements. */ DLMALLOC_EXPORT void** dlindependent_comalloc(size_t, size_t*, void**); /* bulk_free(void* array[], size_t n_elements) Frees and clears (sets to null) each non-null pointer in the given array. This is likely to be faster than freeing them one-by-one. If footers are used, pointers that have been allocated in different mspaces are not freed or cleared, and the count of all such pointers is returned. For large arrays of pointers with poor locality, it may be worthwhile to sort this array before calling bulk_free. */ DLMALLOC_EXPORT size_t dlbulk_free(void**, size_t n_elements); /* pvalloc(size_t n); Equivalent to valloc(minimum-page-that-holds(n)), that is, round up n to nearest pagesize. */ DLMALLOC_EXPORT void* dlpvalloc(size_t); /* malloc_trim(size_t pad); If possible, gives memory back to the system (via negative arguments to sbrk) if there is unused memory at the `high' end of the malloc pool or in unused MMAP segments. You can call this after freeing large blocks of memory to potentially reduce the system-level memory requirements of a program. However, it cannot guarantee to reduce memory. Under some allocation patterns, some large free blocks of memory will be locked between two used chunks, so they cannot be given back to the system. The `pad' argument to malloc_trim represents the amount of free trailing space to leave untrimmed. If this argument is zero, only the minimum amount of memory to maintain internal data structures will be left. Non-zero arguments can be supplied to maintain enough trailing space to service future expected allocations without having to re-obtain memory from the system. Malloc_trim returns 1 if it actually released any memory, else 0. */ DLMALLOC_EXPORT int dlmalloc_trim(size_t); /* malloc_stats(); Prints on stderr the amount of space obtained from the system (both via sbrk and mmap), the maximum amount (which may be more than current if malloc_trim and/or munmap got called), and the current number of bytes allocated via malloc (or realloc, etc) but not yet freed. Note that this is the number of bytes allocated, not the number requested. It will be larger than the number requested because of alignment and bookkeeping overhead. Because it includes alignment wastage as being in use, this figure may be greater than zero even when no user-level chunks are allocated. The reported current and maximum system memory can be inaccurate if a program makes other calls to system memory allocation functions (normally sbrk) outside of malloc. malloc_stats prints only the most commonly interesting statistics. More information can be obtained by calling mallinfo. */ DLMALLOC_EXPORT void dlmalloc_stats(void); /* malloc_usable_size(void* p); Returns the number of bytes you can actually use in an allocated chunk, which may be more than you requested (although often not) due to alignment and minimum size constraints. You can use this many bytes without worrying about overwriting other allocated objects. This is not a particularly great programming practice. malloc_usable_size can be more useful in debugging and assertions, for example: p = malloc(n); assert(malloc_usable_size(p) >= 256); */ size_t dlmalloc_usable_size(void*); #endif /* ONLY_MSPACES */ #if MSPACES /* mspace is an opaque type representing an independent region of space that supports mspace_malloc, etc. */ typedef void* mspace; /* create_mspace creates and returns a new independent space with the given initial capacity, or, if 0, the default granularity size. It returns null if there is no system memory available to create the space. If argument locked is non-zero, the space uses a separate lock to control access. The capacity of the space will grow dynamically as needed to service mspace_malloc requests. You can control the sizes of incremental increases of this space by compiling with a different DEFAULT_GRANULARITY or dynamically setting with mallopt(M_GRANULARITY, value). */ DLMALLOC_EXPORT mspace create_mspace(size_t capacity, int locked); /* destroy_mspace destroys the given space, and attempts to return all of its memory back to the system, returning the total number of bytes freed. After destruction, the results of access to all memory used by the space become undefined. */ DLMALLOC_EXPORT size_t destroy_mspace(mspace msp); /* create_mspace_with_base uses the memory supplied as the initial base of a new mspace. Part (less than 128*sizeof(size_t) bytes) of this space is used for bookkeeping, so the capacity must be at least this large. (Otherwise 0 is returned.) When this initial space is exhausted, additional memory will be obtained from the system. Destroying this space will deallocate all additionally allocated space (if possible) but not the initial base. */ DLMALLOC_EXPORT mspace create_mspace_with_base(void* base, size_t capacity, int locked); /* mspace_track_large_chunks controls whether requests for large chunks are allocated in their own untracked mmapped regions, separate from others in this mspace. By default large chunks are not tracked, which reduces fragmentation. However, such chunks are not necessarily released to the system upon destroy_mspace. Enabling tracking by setting to true may increase fragmentation, but avoids leakage when relying on destroy_mspace to release all memory allocated using this space. The function returns the previous setting. */ DLMALLOC_EXPORT int mspace_track_large_chunks(mspace msp, int enable); /* mspace_malloc behaves as malloc, but operates within the given space. */ DLMALLOC_EXPORT void* mspace_malloc(mspace msp, size_t bytes); /* mspace_free behaves as free, but operates within the given space. If compiled with FOOTERS==1, mspace_free is not actually needed. free may be called instead of mspace_free because freed chunks from any space are handled by their originating spaces. */ DLMALLOC_EXPORT void mspace_free(mspace msp, void* mem); /* mspace_realloc behaves as realloc, but operates within the given space. If compiled with FOOTERS==1, mspace_realloc is not actually needed. realloc may be called instead of mspace_realloc because realloced chunks from any space are handled by their originating spaces. */ DLMALLOC_EXPORT void* mspace_realloc(mspace msp, void* mem, size_t newsize); /* mspace_calloc behaves as calloc, but operates within the given space. */ DLMALLOC_EXPORT void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size); /* mspace_memalign behaves as memalign, but operates within the given space. */ DLMALLOC_EXPORT void* mspace_memalign(mspace msp, size_t alignment, size_t bytes); /* mspace_independent_calloc behaves as independent_calloc, but operates within the given space. */ DLMALLOC_EXPORT void** mspace_independent_calloc(mspace msp, size_t n_elements, size_t elem_size, void* chunks[]); /* mspace_independent_comalloc behaves as independent_comalloc, but operates within the given space. */ DLMALLOC_EXPORT void** mspace_independent_comalloc(mspace msp, size_t n_elements, size_t sizes[], void* chunks[]); /* mspace_footprint() returns the number of bytes obtained from the system for this space. */ DLMALLOC_EXPORT size_t mspace_footprint(mspace msp); /* mspace_max_footprint() returns the peak number of bytes obtained from the system for this space. */ DLMALLOC_EXPORT size_t mspace_max_footprint(mspace msp); #if !NO_MALLINFO /* mspace_mallinfo behaves as mallinfo, but reports properties of the given space. */ DLMALLOC_EXPORT struct mallinfo mspace_mallinfo(mspace msp); #endif /* NO_MALLINFO */ /* malloc_usable_size(void* p) behaves the same as malloc_usable_size; */ DLMALLOC_EXPORT size_t mspace_usable_size(const void* mem); /* mspace_malloc_stats behaves as malloc_stats, but reports properties of the given space. */ DLMALLOC_EXPORT void mspace_malloc_stats(mspace msp); /* mspace_trim behaves as malloc_trim, but operates within the given space. */ DLMALLOC_EXPORT int mspace_trim(mspace msp, size_t pad); /* An alias for mallopt. */ DLMALLOC_EXPORT int mspace_mallopt(int, int); #endif /* MSPACES */ #ifdef __cplusplus } /* end of extern "C" */ #endif /* __cplusplus */ /* ======================================================================== To make a fully customizable malloc.h header file, cut everything above this line, put into file malloc.h, edit to suit, and #include it on the next line, as well as in programs that use this malloc. ======================================================================== */ /* #include "malloc.h" */ /*------------------------------ internal #includes ---------------------- */ #ifdef _MSC_VER #pragma warning( disable : 4146 ) /* no "unsigned" warnings */ #endif /* _MSC_VER */ #if !NO_MALLOC_STATS #include /* for printing in malloc_stats */ #endif /* NO_MALLOC_STATS */ #ifndef LACKS_ERRNO_H #include /* for MALLOC_FAILURE_ACTION */ #endif /* LACKS_ERRNO_H */ #ifdef DEBUG #if ABORT_ON_ASSERT_FAILURE #undef assert #define assert(x) if(!(x)) ABORT #else /* ABORT_ON_ASSERT_FAILURE */ #include #endif /* ABORT_ON_ASSERT_FAILURE */ #else /* DEBUG */ #ifndef assert #define assert(x) #endif #define DEBUG 0 #endif /* DEBUG */ #if !defined(WIN32) && !defined(LACKS_TIME_H) #include /* for magic initialization */ #endif /* WIN32 */ #ifndef LACKS_STDLIB_H #include /* for abort() */ #endif /* LACKS_STDLIB_H */ #ifndef LACKS_STRING_H #include /* for memset etc */ #endif /* LACKS_STRING_H */ #if USE_BUILTIN_FFS #ifndef LACKS_STRINGS_H #include /* for ffs */ #endif /* LACKS_STRINGS_H */ #endif /* USE_BUILTIN_FFS */ #if HAVE_MMAP #ifndef LACKS_SYS_MMAN_H /* On some versions of linux, mremap decl in mman.h needs __USE_GNU set */ #if (defined(linux) && !defined(__USE_GNU)) #define __USE_GNU 1 #include /* for mmap */ #undef __USE_GNU #else #include /* for mmap */ #endif /* linux */ #endif /* LACKS_SYS_MMAN_H */ #ifndef LACKS_FCNTL_H #include #endif /* LACKS_FCNTL_H */ #endif /* HAVE_MMAP */ #ifndef LACKS_UNISTD_H #include /* for sbrk, sysconf */ #else /* LACKS_UNISTD_H */ #if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) extern void* sbrk(ptrdiff_t); #endif /* FreeBSD etc */ #endif /* LACKS_UNISTD_H */ /* Declarations for locking */ #if USE_LOCKS #ifndef WIN32 #if defined (__SVR4) && defined (__sun) /* solaris */ #include #elif !defined(LACKS_SCHED_H) #include #endif /* solaris or LACKS_SCHED_H */ #if (defined(USE_RECURSIVE_LOCKS) && USE_RECURSIVE_LOCKS != 0) || !USE_SPIN_LOCKS #include #endif /* USE_RECURSIVE_LOCKS ... */ #elif defined(_MSC_VER) #ifndef _M_AMD64 /* These are already defined on AMD64 builds */ #ifdef __cplusplus extern "C" { #endif /* __cplusplus */ LONG __cdecl _InterlockedCompareExchange(LONG volatile *Dest, LONG Exchange, LONG Comp); LONG __cdecl _InterlockedExchange(LONG volatile *Target, LONG Value); #ifdef __cplusplus } #endif /* __cplusplus */ #endif /* _M_AMD64 */ #pragma intrinsic (_InterlockedCompareExchange) #pragma intrinsic (_InterlockedExchange) #define interlockedcompareexchange _InterlockedCompareExchange #define interlockedexchange _InterlockedExchange #elif defined(WIN32) && defined(__GNUC__) #define interlockedcompareexchange(a, b, c) __sync_val_compare_and_swap(a, c, b) #define interlockedexchange __sync_lock_test_and_set #endif /* Win32 */ #else /* USE_LOCKS */ #endif /* USE_LOCKS */ #ifndef LOCK_AT_FORK #define LOCK_AT_FORK 0 #endif /* Declarations for bit scanning on win32 */ #if defined(_MSC_VER) && _MSC_VER>=1300 #ifndef BitScanForward /* Try to avoid pulling in WinNT.h */ #ifdef __cplusplus extern "C" { #endif /* __cplusplus */ unsigned char _BitScanForward(unsigned long *index, unsigned long mask); unsigned char _BitScanReverse(unsigned long *index, unsigned long mask); #ifdef __cplusplus } #endif /* __cplusplus */ #define BitScanForward _BitScanForward #define BitScanReverse _BitScanReverse #pragma intrinsic(_BitScanForward) #pragma intrinsic(_BitScanReverse) #endif /* BitScanForward */ #endif /* defined(_MSC_VER) && _MSC_VER>=1300 */ #ifndef WIN32 #ifndef malloc_getpagesize # ifdef _SC_PAGESIZE /* some SVR4 systems omit an underscore */ # ifndef _SC_PAGE_SIZE # define _SC_PAGE_SIZE _SC_PAGESIZE # endif # endif # ifdef _SC_PAGE_SIZE # define malloc_getpagesize sysconf(_SC_PAGE_SIZE) # else # if defined(BSD) || defined(DGUX) || defined(HAVE_GETPAGESIZE) extern size_t getpagesize(); # define malloc_getpagesize getpagesize() # else # ifdef WIN32 /* use supplied emulation of getpagesize */ # define malloc_getpagesize getpagesize() # else # ifndef LACKS_SYS_PARAM_H # include # endif # ifdef EXEC_PAGESIZE # define malloc_getpagesize EXEC_PAGESIZE # else # ifdef NBPG # ifndef CLSIZE # define malloc_getpagesize NBPG # else # define malloc_getpagesize (NBPG * CLSIZE) # endif # else # ifdef NBPC # define malloc_getpagesize NBPC # else # ifdef PAGESIZE # define malloc_getpagesize PAGESIZE # else /* just guess */ # define malloc_getpagesize ((size_t)4096U) # endif # endif # endif # endif # endif # endif # endif #endif #endif /* ------------------- size_t and alignment properties -------------------- */ /* The byte and bit size of a size_t */ #define SIZE_T_SIZE (sizeof(size_t)) #define SIZE_T_BITSIZE (sizeof(size_t) << 3) /* Some constants coerced to size_t */ /* Annoying but necessary to avoid errors on some platforms */ #define SIZE_T_ZERO ((size_t)0) #define SIZE_T_ONE ((size_t)1) #define SIZE_T_TWO ((size_t)2) #define SIZE_T_FOUR ((size_t)4) #define TWO_SIZE_T_SIZES (SIZE_T_SIZE<<1) #define FOUR_SIZE_T_SIZES (SIZE_T_SIZE<<2) #define SIX_SIZE_T_SIZES (FOUR_SIZE_T_SIZES+TWO_SIZE_T_SIZES) #define HALF_MAX_SIZE_T (MAX_SIZE_T / 2U) /* The bit mask value corresponding to MALLOC_ALIGNMENT */ #define CHUNK_ALIGN_MASK (MALLOC_ALIGNMENT - SIZE_T_ONE) /* True if address a has acceptable alignment */ #define is_aligned(A) (((size_t)((A)) & (CHUNK_ALIGN_MASK)) == 0) /* the number of bytes to offset an address to align it */ #define align_offset(A)\ ((((size_t)(A) & CHUNK_ALIGN_MASK) == 0)? 0 :\ ((MALLOC_ALIGNMENT - ((size_t)(A) & CHUNK_ALIGN_MASK)) & CHUNK_ALIGN_MASK)) /* -------------------------- MMAP preliminaries ------------------------- */ /* If HAVE_MORECORE or HAVE_MMAP are false, we just define calls and checks to fail so compiler optimizer can delete code rather than using so many "#if"s. */ /* MORECORE and MMAP must return MFAIL on failure */ #define MFAIL ((void*)(MAX_SIZE_T)) #define CMFAIL ((char*)(MFAIL)) /* defined for convenience */ #if HAVE_MMAP #ifndef WIN32 #define MUNMAP_DEFAULT(a, s) munmap((a), (s)) #define MMAP_PROT (PROT_READ|PROT_WRITE) #if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) #define MAP_ANONYMOUS MAP_ANON #endif /* MAP_ANON */ #ifdef MAP_ANONYMOUS #define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS) #define MMAP_DEFAULT(s) mmap(0, (s), MMAP_PROT, MMAP_FLAGS, -1, 0) #else /* MAP_ANONYMOUS */ /* Nearly all versions of mmap support MAP_ANONYMOUS, so the following is unlikely to be needed, but is supplied just in case. */ #define MMAP_FLAGS (MAP_PRIVATE) static int dev_zero_fd = -1; /* Cached file descriptor for /dev/zero. */ #define MMAP_DEFAULT(s) ((dev_zero_fd < 0) ? \ (dev_zero_fd = open("/dev/zero", O_RDWR), \ mmap(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) : \ mmap(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) #endif /* MAP_ANONYMOUS */ #define DIRECT_MMAP_DEFAULT(s) MMAP_DEFAULT(s) #else /* WIN32 */ /* Win32 MMAP via VirtualAlloc */ static FORCEINLINE void* win32mmap(size_t size) { void* ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); return (ptr != 0)? ptr: MFAIL; } /* For direct MMAP, use MEM_TOP_DOWN to minimize interference */ static FORCEINLINE void* win32direct_mmap(size_t size) { void* ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, PAGE_READWRITE); return (ptr != 0)? ptr: MFAIL; } /* This function supports releasing coalesed segments */ static FORCEINLINE int win32munmap(void* ptr, size_t size) { MEMORY_BASIC_INFORMATION minfo; char* cptr = (char*)ptr; while (size) { if (VirtualQuery(cptr, &minfo, sizeof(minfo)) == 0) return -1; if (minfo.BaseAddress != cptr || minfo.AllocationBase != cptr || minfo.State != MEM_COMMIT || minfo.RegionSize > size) return -1; if (VirtualFree(cptr, 0, MEM_RELEASE) == 0) return -1; cptr += minfo.RegionSize; size -= minfo.RegionSize; } return 0; } #define MMAP_DEFAULT(s) win32mmap(s) #define MUNMAP_DEFAULT(a, s) win32munmap((a), (s)) #define DIRECT_MMAP_DEFAULT(s) win32direct_mmap(s) #endif /* WIN32 */ #endif /* HAVE_MMAP */ #if HAVE_MREMAP #ifndef WIN32 #define MREMAP_DEFAULT(addr, osz, nsz, mv) mremap((addr), (osz), (nsz), (mv)) #endif /* WIN32 */ #endif /* HAVE_MREMAP */ /** * Define CALL_MORECORE */ #if HAVE_MORECORE #ifdef MORECORE #define CALL_MORECORE(S) MORECORE(S) #else /* MORECORE */ #define CALL_MORECORE(S) MORECORE_DEFAULT(S) #endif /* MORECORE */ #else /* HAVE_MORECORE */ #define CALL_MORECORE(S) MFAIL #endif /* HAVE_MORECORE */ /** * Define CALL_MMAP/CALL_MUNMAP/CALL_DIRECT_MMAP */ #if HAVE_MMAP #define USE_MMAP_BIT (SIZE_T_ONE) #ifdef MMAP #define CALL_MMAP(s) MMAP(s) #else /* MMAP */ #define CALL_MMAP(s) MMAP_DEFAULT(s) #endif /* MMAP */ #ifdef MUNMAP #define CALL_MUNMAP(a, s) MUNMAP((a), (s)) #else /* MUNMAP */ #define CALL_MUNMAP(a, s) MUNMAP_DEFAULT((a), (s)) #endif /* MUNMAP */ #ifdef DIRECT_MMAP #define CALL_DIRECT_MMAP(s) DIRECT_MMAP(s) #else /* DIRECT_MMAP */ #define CALL_DIRECT_MMAP(s) DIRECT_MMAP_DEFAULT(s) #endif /* DIRECT_MMAP */ #else /* HAVE_MMAP */ #define USE_MMAP_BIT (SIZE_T_ZERO) #define MMAP(s) MFAIL #define MUNMAP(a, s) (-1) #define DIRECT_MMAP(s) MFAIL #define CALL_DIRECT_MMAP(s) DIRECT_MMAP(s) #define CALL_MMAP(s) MMAP(s) #define CALL_MUNMAP(a, s) MUNMAP((a), (s)) #endif /* HAVE_MMAP */ /** * Define CALL_MREMAP */ #if HAVE_MMAP && HAVE_MREMAP #ifdef MREMAP #define CALL_MREMAP(addr, osz, nsz, mv) MREMAP((addr), (osz), (nsz), (mv)) #else /* MREMAP */ #define CALL_MREMAP(addr, osz, nsz, mv) MREMAP_DEFAULT((addr), (osz), (nsz), (mv)) #endif /* MREMAP */ #else /* HAVE_MMAP && HAVE_MREMAP */ #define CALL_MREMAP(addr, osz, nsz, mv) MFAIL #endif /* HAVE_MMAP && HAVE_MREMAP */ /* mstate bit set if continguous morecore disabled or failed */ #define USE_NONCONTIGUOUS_BIT (4U) /* segment bit set in create_mspace_with_base */ #define EXTERN_BIT (8U) /* --------------------------- Lock preliminaries ------------------------ */ /* When locks are defined, there is one global lock, plus one per-mspace lock. The global lock_ensures that mparams.magic and other unique mparams values are initialized only once. It also protects sequences of calls to MORECORE. In many cases sys_alloc requires two calls, that should not be interleaved with calls by other threads. This does not protect against direct calls to MORECORE by other threads not using this lock, so there is still code to cope the best we can on interference. Per-mspace locks surround calls to malloc, free, etc. By default, locks are simple non-reentrant mutexes. Because lock-protected regions generally have bounded times, it is OK to use the supplied simple spinlocks. Spinlocks are likely to improve performance for lightly contended applications, but worsen performance under heavy contention. If USE_LOCKS is > 1, the definitions of lock routines here are bypassed, in which case you will need to define the type MLOCK_T, and at least INITIAL_LOCK, DESTROY_LOCK, ACQUIRE_LOCK, RELEASE_LOCK and TRY_LOCK. You must also declare a static MLOCK_T malloc_global_mutex = { initialization values };. */ #if !USE_LOCKS #define USE_LOCK_BIT (0U) #define INITIAL_LOCK(l) (0) #define DESTROY_LOCK(l) (0) #define ACQUIRE_MALLOC_GLOBAL_LOCK() #define RELEASE_MALLOC_GLOBAL_LOCK() #else #if USE_LOCKS > 1 /* ----------------------- User-defined locks ------------------------ */ /* Define your own lock implementation here */ /* #define INITIAL_LOCK(lk) ... */ /* #define DESTROY_LOCK(lk) ... */ /* #define ACQUIRE_LOCK(lk) ... */ /* #define RELEASE_LOCK(lk) ... */ /* #define TRY_LOCK(lk) ... */ /* static MLOCK_T malloc_global_mutex = ... */ #elif USE_SPIN_LOCKS /* First, define CAS_LOCK and CLEAR_LOCK on ints */ /* Note CAS_LOCK defined to return 0 on success */ #if defined(__GNUC__)&& (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) #define CAS_LOCK(sl) __sync_lock_test_and_set(sl, 1) #define CLEAR_LOCK(sl) __sync_lock_release(sl) #elif (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))) /* Custom spin locks for older gcc on x86 */ static FORCEINLINE int x86_cas_lock(int *sl) { int ret; int val = 1; int cmp = 0; __asm__ __volatile__ ("lock; cmpxchgl %1, %2" : "=a" (ret) : "r" (val), "m" (*(sl)), "0"(cmp) : "memory", "cc"); return ret; } static FORCEINLINE void x86_clear_lock(int* sl) { assert(*sl != 0); int prev = 0; int ret; __asm__ __volatile__ ("lock; xchgl %0, %1" : "=r" (ret) : "m" (*(sl)), "0"(prev) : "memory"); } #define CAS_LOCK(sl) x86_cas_lock(sl) #define CLEAR_LOCK(sl) x86_clear_lock(sl) #else /* Win32 MSC */ #define CAS_LOCK(sl) interlockedexchange(sl, (LONG)1) #define CLEAR_LOCK(sl) interlockedexchange (sl, (LONG)0) #endif /* ... gcc spins locks ... */ /* How to yield for a spin lock */ #define SPINS_PER_YIELD 63 #if defined(_MSC_VER) #define SLEEP_EX_DURATION 50 /* delay for yield/sleep */ #define SPIN_LOCK_YIELD SleepEx(SLEEP_EX_DURATION, FALSE) #elif defined (__SVR4) && defined (__sun) /* solaris */ #define SPIN_LOCK_YIELD thr_yield(); #elif !defined(LACKS_SCHED_H) #define SPIN_LOCK_YIELD sched_yield(); #else #define SPIN_LOCK_YIELD #endif /* ... yield ... */ #if !defined(USE_RECURSIVE_LOCKS) || USE_RECURSIVE_LOCKS == 0 /* Plain spin locks use single word (embedded in malloc_states) */ static int spin_acquire_lock(int *sl) { int spins = 0; while (*(volatile int *)sl != 0 || CAS_LOCK(sl)) { if ((++spins & SPINS_PER_YIELD) == 0) { SPIN_LOCK_YIELD; } } return 0; } #define MLOCK_T int #define TRY_LOCK(sl) !CAS_LOCK(sl) #define RELEASE_LOCK(sl) CLEAR_LOCK(sl) #define ACQUIRE_LOCK(sl) (CAS_LOCK(sl)? spin_acquire_lock(sl) : 0) #define INITIAL_LOCK(sl) (*sl = 0) #define DESTROY_LOCK(sl) (0) static MLOCK_T malloc_global_mutex = 0; #else /* USE_RECURSIVE_LOCKS */ /* types for lock owners */ #ifdef WIN32 #define THREAD_ID_T DWORD #define CURRENT_THREAD GetCurrentThreadId() #define EQ_OWNER(X,Y) ((X) == (Y)) #else /* Note: the following assume that pthread_t is a type that can be initialized to (casted) zero. If this is not the case, you will need to somehow redefine these or not use spin locks. */ #define THREAD_ID_T pthread_t #define CURRENT_THREAD pthread_self() #define EQ_OWNER(X,Y) pthread_equal(X, Y) #endif struct malloc_recursive_lock { int sl; unsigned int c; THREAD_ID_T threadid; }; #define MLOCK_T struct malloc_recursive_lock static MLOCK_T malloc_global_mutex = { 0, 0, (THREAD_ID_T)0}; static FORCEINLINE void recursive_release_lock(MLOCK_T *lk) { assert(lk->sl != 0); if (--lk->c == 0) { CLEAR_LOCK(&lk->sl); } } static FORCEINLINE int recursive_acquire_lock(MLOCK_T *lk) { THREAD_ID_T mythreadid = CURRENT_THREAD; int spins = 0; for (;;) { if (*((volatile int *)(&lk->sl)) == 0) { if (!CAS_LOCK(&lk->sl)) { lk->threadid = mythreadid; lk->c = 1; return 0; } } else if (EQ_OWNER(lk->threadid, mythreadid)) { ++lk->c; return 0; } if ((++spins & SPINS_PER_YIELD) == 0) { SPIN_LOCK_YIELD; } } } static FORCEINLINE int recursive_try_lock(MLOCK_T *lk) { THREAD_ID_T mythreadid = CURRENT_THREAD; if (*((volatile int *)(&lk->sl)) == 0) { if (!CAS_LOCK(&lk->sl)) { lk->threadid = mythreadid; lk->c = 1; return 1; } } else if (EQ_OWNER(lk->threadid, mythreadid)) { ++lk->c; return 1; } return 0; } #define RELEASE_LOCK(lk) recursive_release_lock(lk) #define TRY_LOCK(lk) recursive_try_lock(lk) #define ACQUIRE_LOCK(lk) recursive_acquire_lock(lk) #define INITIAL_LOCK(lk) ((lk)->threadid = (THREAD_ID_T)0, (lk)->sl = 0, (lk)->c = 0) #define DESTROY_LOCK(lk) (0) #endif /* USE_RECURSIVE_LOCKS */ #elif defined(WIN32) /* Win32 critical sections */ #define MLOCK_T CRITICAL_SECTION #define ACQUIRE_LOCK(lk) (EnterCriticalSection(lk), 0) #define RELEASE_LOCK(lk) LeaveCriticalSection(lk) #define TRY_LOCK(lk) TryEnterCriticalSection(lk) #define INITIAL_LOCK(lk) (!InitializeCriticalSectionAndSpinCount((lk), 0x80000000|4000)) #define DESTROY_LOCK(lk) (DeleteCriticalSection(lk), 0) #define NEED_GLOBAL_LOCK_INIT static MLOCK_T malloc_global_mutex; static volatile LONG malloc_global_mutex_status; /* Use spin loop to initialize global lock */ static void init_malloc_global_mutex() { for (;;) { long stat = malloc_global_mutex_status; if (stat > 0) return; /* transition to < 0 while initializing, then to > 0) */ if (stat == 0 && interlockedcompareexchange(&malloc_global_mutex_status, (LONG)-1, (LONG)0) == 0) { InitializeCriticalSection(&malloc_global_mutex); interlockedexchange(&malloc_global_mutex_status, (LONG)1); return; } SleepEx(0, FALSE); } } #else /* pthreads-based locks */ #define MLOCK_T pthread_mutex_t #define ACQUIRE_LOCK(lk) pthread_mutex_lock(lk) #define RELEASE_LOCK(lk) pthread_mutex_unlock(lk) #define TRY_LOCK(lk) (!pthread_mutex_trylock(lk)) #define INITIAL_LOCK(lk) pthread_init_lock(lk) #define DESTROY_LOCK(lk) pthread_mutex_destroy(lk) #if defined(USE_RECURSIVE_LOCKS) && USE_RECURSIVE_LOCKS != 0 && defined(linux) && !defined(PTHREAD_MUTEX_RECURSIVE) /* Cope with old-style linux recursive lock initialization by adding */ /* skipped internal declaration from pthread.h */ extern int pthread_mutexattr_setkind_np __P ((pthread_mutexattr_t *__attr, int __kind)); #define PTHREAD_MUTEX_RECURSIVE PTHREAD_MUTEX_RECURSIVE_NP #define pthread_mutexattr_settype(x,y) pthread_mutexattr_setkind_np(x,y) #endif /* USE_RECURSIVE_LOCKS ... */ static MLOCK_T malloc_global_mutex = PTHREAD_MUTEX_INITIALIZER; static int pthread_init_lock (MLOCK_T *lk) { pthread_mutexattr_t attr; if (pthread_mutexattr_init(&attr)) return 1; #if defined(USE_RECURSIVE_LOCKS) && USE_RECURSIVE_LOCKS != 0 if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE)) return 1; #endif if (pthread_mutex_init(lk, &attr)) return 1; if (pthread_mutexattr_destroy(&attr)) return 1; return 0; } #endif /* ... lock types ... */ /* Common code for all lock types */ #define USE_LOCK_BIT (2U) #ifndef ACQUIRE_MALLOC_GLOBAL_LOCK #define ACQUIRE_MALLOC_GLOBAL_LOCK() ACQUIRE_LOCK(&malloc_global_mutex); #endif #ifndef RELEASE_MALLOC_GLOBAL_LOCK #define RELEASE_MALLOC_GLOBAL_LOCK() RELEASE_LOCK(&malloc_global_mutex); #endif #endif /* USE_LOCKS */ /* ----------------------- Chunk representations ------------------------ */ /* (The following includes lightly edited explanations by Colin Plumb.) The malloc_chunk declaration below is misleading (but accurate and necessary). It declares a "view" into memory allowing access to necessary fields at known offsets from a given base. Chunks of memory are maintained using a `boundary tag' method as originally described by Knuth. (See the paper by Paul Wilson ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a survey of such techniques.) Sizes of free chunks are stored both in the front of each chunk and at the end. This makes consolidating fragmented chunks into bigger chunks fast. The head fields also hold bits representing whether chunks are free or in use. Here are some pictures to make it clearer. They are "exploded" to show that the state of a chunk can be thought of as extending from the high 31 bits of the head field of its header through the prev_foot and PINUSE_BIT bit of the following chunk header. A chunk that's in use looks like: chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Size of previous chunk (if P = 0) | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P| | Size of this chunk 1| +-+ mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | | +- -+ | | +- -+ | : +- size - sizeof(size_t) available payload bytes -+ : | chunk-> +- -+ | | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |1| | Size of next chunk (may or may not be in use) | +-+ mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ And if it's free, it looks like this: chunk-> +- -+ | User payload (must be in use, or we would have merged!) | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P| | Size of this chunk 0| +-+ mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Next pointer | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Prev pointer | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | : +- size - sizeof(struct chunk) unused bytes -+ : | chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Size of this chunk | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |0| | Size of next chunk (must be in use, or we would have merged)| +-+ mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | : +- User payload -+ : | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |0| +-+ Note that since we always merge adjacent free chunks, the chunks adjacent to a free chunk must be in use. Given a pointer to a chunk (which can be derived trivially from the payload pointer) we can, in O(1) time, find out whether the adjacent chunks are free, and if so, unlink them from the lists that they are on and merge them with the current chunk. Chunks always begin on even word boundaries, so the mem portion (which is returned to the user) is also on an even word boundary, and thus at least double-word aligned. The P (PINUSE_BIT) bit, stored in the unused low-order bit of the chunk size (which is always a multiple of two words), is an in-use bit for the *previous* chunk. If that bit is *clear*, then the word before the current chunk size contains the previous chunk size, and can be used to find the front of the previous chunk. The very first chunk allocated always has this bit set, preventing access to non-existent (or non-owned) memory. If pinuse is set for any given chunk, then you CANNOT determine the size of the previous chunk, and might even get a memory addressing fault when trying to do so. The C (CINUSE_BIT) bit, stored in the unused second-lowest bit of the chunk size redundantly records whether the current chunk is inuse (unless the chunk is mmapped). This redundancy enables usage checks within free and realloc, and reduces indirection when freeing and consolidating chunks. Each freshly allocated chunk must have both cinuse and pinuse set. That is, each allocated chunk borders either a previously allocated and still in-use chunk, or the base of its memory arena. This is ensured by making all allocations from the `lowest' part of any found chunk. Further, no free chunk physically borders another one, so each free chunk is known to be preceded and followed by either inuse chunks or the ends of memory. Note that the `foot' of the current chunk is actually represented as the prev_foot of the NEXT chunk. This makes it easier to deal with alignments etc but can be very confusing when trying to extend or adapt this code. The exceptions to all this are 1. The special chunk `top' is the top-most available chunk (i.e., the one bordering the end of available memory). It is treated specially. Top is never included in any bin, is used only if no other chunk is available, and is released back to the system if it is very large (see M_TRIM_THRESHOLD). In effect, the top chunk is treated as larger (and thus less well fitting) than any other available chunk. The top chunk doesn't update its trailing size field since there is no next contiguous chunk that would have to index off it. However, space is still allocated for it (TOP_FOOT_SIZE) to enable separation or merging when space is extended. 3. Chunks allocated via mmap, have both cinuse and pinuse bits cleared in their head fields. Because they are allocated one-by-one, each must carry its own prev_foot field, which is also used to hold the offset this chunk has within its mmapped region, which is needed to preserve alignment. Each mmapped chunk is trailed by the first two fields of a fake next-chunk for sake of usage checks. */ struct malloc_chunk { size_t prev_foot; /* Size of previous chunk (if free). */ size_t head; /* Size and inuse bits. */ struct malloc_chunk* fd; /* double links -- used only if free. */ struct malloc_chunk* bk; }; typedef struct malloc_chunk mchunk; typedef struct malloc_chunk* mchunkptr; typedef struct malloc_chunk* sbinptr; /* The type of bins of chunks */ typedef unsigned int bindex_t; /* Described below */ typedef unsigned int binmap_t; /* Described below */ typedef unsigned int flag_t; /* The type of various bit flag sets */ /* ------------------- Chunks sizes and alignments ----------------------- */ #define MCHUNK_SIZE (sizeof(mchunk)) #if FOOTERS #define CHUNK_OVERHEAD (TWO_SIZE_T_SIZES) #else /* FOOTERS */ #define CHUNK_OVERHEAD (SIZE_T_SIZE) #endif /* FOOTERS */ /* MMapped chunks need a second word of overhead ... */ #define MMAP_CHUNK_OVERHEAD (TWO_SIZE_T_SIZES) /* ... and additional padding for fake next-chunk at foot */ #define MMAP_FOOT_PAD (FOUR_SIZE_T_SIZES) /* The smallest size we can malloc is an aligned minimal chunk */ #define MIN_CHUNK_SIZE\ ((MCHUNK_SIZE + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK) /* conversion from malloc headers to user pointers, and back */ #define chunk2mem(p) ((void*)((char*)(p) + TWO_SIZE_T_SIZES)) #define mem2chunk(mem) ((mchunkptr)((char*)(mem) - TWO_SIZE_T_SIZES)) /* chunk associated with aligned address A */ #define align_as_chunk(A) (mchunkptr)((A) + align_offset(chunk2mem(A))) /* Bounds on request (not chunk) sizes. */ #define MAX_REQUEST ((-MIN_CHUNK_SIZE) << 2) #define MIN_REQUEST (MIN_CHUNK_SIZE - CHUNK_OVERHEAD - SIZE_T_ONE) /* pad request bytes into a usable size */ #define pad_request(req) \ (((req) + CHUNK_OVERHEAD + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK) /* pad request, checking for minimum (but not maximum) */ #define request2size(req) \ (((req) < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(req)) /* ------------------ Operations on head and foot fields ----------------- */ /* The head field of a chunk is or'ed with PINUSE_BIT when previous adjacent chunk in use, and or'ed with CINUSE_BIT if this chunk is in use, unless mmapped, in which case both bits are cleared. FLAG4_BIT is not used by this malloc, but might be useful in extensions. */ #define PINUSE_BIT (SIZE_T_ONE) #define CINUSE_BIT (SIZE_T_TWO) #define FLAG4_BIT (SIZE_T_FOUR) #define INUSE_BITS (PINUSE_BIT|CINUSE_BIT) #define FLAG_BITS (PINUSE_BIT|CINUSE_BIT|FLAG4_BIT) /* Head value for fenceposts */ #define FENCEPOST_HEAD (INUSE_BITS|SIZE_T_SIZE) /* extraction of fields from head words */ #define cinuse(p) ((p)->head & CINUSE_BIT) #define pinuse(p) ((p)->head & PINUSE_BIT) #define flag4inuse(p) ((p)->head & FLAG4_BIT) #define is_inuse(p) (((p)->head & INUSE_BITS) != PINUSE_BIT) #define is_mmapped(p) (((p)->head & INUSE_BITS) == 0) #define chunksize(p) ((p)->head & ~(FLAG_BITS)) #define clear_pinuse(p) ((p)->head &= ~PINUSE_BIT) #define set_flag4(p) ((p)->head |= FLAG4_BIT) #define clear_flag4(p) ((p)->head &= ~FLAG4_BIT) /* Treat space at ptr +/- offset as a chunk */ #define chunk_plus_offset(p, s) ((mchunkptr)(((char*)(p)) + (s))) #define chunk_minus_offset(p, s) ((mchunkptr)(((char*)(p)) - (s))) /* Ptr to next or previous physical malloc_chunk. */ #define next_chunk(p) ((mchunkptr)( ((char*)(p)) + ((p)->head & ~FLAG_BITS))) #define prev_chunk(p) ((mchunkptr)( ((char*)(p)) - ((p)->prev_foot) )) /* extract next chunk's pinuse bit */ #define next_pinuse(p) ((next_chunk(p)->head) & PINUSE_BIT) /* Get/set size at footer */ #define get_foot(p, s) (((mchunkptr)((char*)(p) + (s)))->prev_foot) #define set_foot(p, s) (((mchunkptr)((char*)(p) + (s)))->prev_foot = (s)) /* Set size, pinuse bit, and foot */ #define set_size_and_pinuse_of_free_chunk(p, s)\ ((p)->head = (s|PINUSE_BIT), set_foot(p, s)) /* Set size, pinuse bit, foot, and clear next pinuse */ #define set_free_with_pinuse(p, s, n)\ (clear_pinuse(n), set_size_and_pinuse_of_free_chunk(p, s)) /* Get the internal overhead associated with chunk p */ #define overhead_for(p)\ (is_mmapped(p)? MMAP_CHUNK_OVERHEAD : CHUNK_OVERHEAD) /* Return true if malloced space is not necessarily cleared */ #if MMAP_CLEARS #define calloc_must_clear(p) (!is_mmapped(p)) #else /* MMAP_CLEARS */ #define calloc_must_clear(p) (1) #endif /* MMAP_CLEARS */ /* ---------------------- Overlaid data structures ----------------------- */ /* When chunks are not in use, they are treated as nodes of either lists or trees. "Small" chunks are stored in circular doubly-linked lists, and look like this: chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Size of previous chunk | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ `head:' | Size of chunk, in bytes |P| mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Forward pointer to next chunk in list | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Back pointer to previous chunk in list | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Unused space (may be 0 bytes long) . . . . | nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ `foot:' | Size of chunk, in bytes | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ Larger chunks are kept in a form of bitwise digital trees (aka tries) keyed on chunksizes. Because malloc_tree_chunks are only for free chunks greater than 256 bytes, their size doesn't impose any constraints on user chunk sizes. Each node looks like: chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Size of previous chunk | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ `head:' | Size of chunk, in bytes |P| mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Forward pointer to next chunk of same size | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Back pointer to previous chunk of same size | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Pointer to left child (child[0]) | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Pointer to right child (child[1]) | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Pointer to parent | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | bin index of this chunk | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Unused space . . | nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ `foot:' | Size of chunk, in bytes | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ Each tree holding treenodes is a tree of unique chunk sizes. Chunks of the same size are arranged in a circularly-linked list, with only the oldest chunk (the next to be used, in our FIFO ordering) actually in the tree. (Tree members are distinguished by a non-null parent pointer.) If a chunk with the same size an an existing node is inserted, it is linked off the existing node using pointers that work in the same way as fd/bk pointers of small chunks. Each tree contains a power of 2 sized range of chunk sizes (the smallest is 0x100 <= x < 0x180), which is is divided in half at each tree level, with the chunks in the smaller half of the range (0x100 <= x < 0x140 for the top nose) in the left subtree and the larger half (0x140 <= x < 0x180) in the right subtree. This is, of course, done by inspecting individual bits. Using these rules, each node's left subtree contains all smaller sizes than its right subtree. However, the node at the root of each subtree has no particular ordering relationship to either. (The dividing line between the subtree sizes is based on trie relation.) If we remove the last chunk of a given size from the interior of the tree, we need to replace it with a leaf node. The tree ordering rules permit a node to be replaced by any leaf below it. The smallest chunk in a tree (a common operation in a best-fit allocator) can be found by walking a path to the leftmost leaf in the tree. Unlike a usual binary tree, where we follow left child pointers until we reach a null, here we follow the right child pointer any time the left one is null, until we reach a leaf with both child pointers null. The smallest chunk in the tree will be somewhere along that path. The worst case number of steps to add, find, or remove a node is bounded by the number of bits differentiating chunks within bins. Under current bin calculations, this ranges from 6 up to 21 (for 32 bit sizes) or up to 53 (for 64 bit sizes). The typical case is of course much better. */ struct malloc_tree_chunk { /* The first four fields must be compatible with malloc_chunk */ size_t prev_foot; size_t head; struct malloc_tree_chunk* fd; struct malloc_tree_chunk* bk; struct malloc_tree_chunk* child[2]; struct malloc_tree_chunk* parent; bindex_t index; }; typedef struct malloc_tree_chunk tchunk; typedef struct malloc_tree_chunk* tchunkptr; typedef struct malloc_tree_chunk* tbinptr; /* The type of bins of trees */ /* A little helper macro for trees */ #define leftmost_child(t) ((t)->child[0] != 0? (t)->child[0] : (t)->child[1]) /* ----------------------------- Segments -------------------------------- */ /* Each malloc space may include non-contiguous segments, held in a list headed by an embedded malloc_segment record representing the top-most space. Segments also include flags holding properties of the space. Large chunks that are directly allocated by mmap are not included in this list. They are instead independently created and destroyed without otherwise keeping track of them. Segment management mainly comes into play for spaces allocated by MMAP. Any call to MMAP might or might not return memory that is adjacent to an existing segment. MORECORE normally contiguously extends the current space, so this space is almost always adjacent, which is simpler and faster to deal with. (This is why MORECORE is used preferentially to MMAP when both are available -- see sys_alloc.) When allocating using MMAP, we don't use any of the hinting mechanisms (inconsistently) supported in various implementations of unix mmap, or distinguish reserving from committing memory. Instead, we just ask for space, and exploit contiguity when we get it. It is probably possible to do better than this on some systems, but no general scheme seems to be significantly better. Management entails a simpler variant of the consolidation scheme used for chunks to reduce fragmentation -- new adjacent memory is normally prepended or appended to an existing segment. However, there are limitations compared to chunk consolidation that mostly reflect the fact that segment processing is relatively infrequent (occurring only when getting memory from system) and that we don't expect to have huge numbers of segments: * Segments are not indexed, so traversal requires linear scans. (It would be possible to index these, but is not worth the extra overhead and complexity for most programs on most platforms.) * New segments are only appended to old ones when holding top-most memory; if they cannot be prepended to others, they are held in different segments. Except for the top-most segment of an mstate, each segment record is kept at the tail of its segment. Segments are added by pushing segment records onto the list headed by &mstate.seg for the containing mstate. Segment flags control allocation/merge/deallocation policies: * If EXTERN_BIT set, then we did not allocate this segment, and so should not try to deallocate or merge with others. (This currently holds only for the initial segment passed into create_mspace_with_base.) * If USE_MMAP_BIT set, the segment may be merged with other surrounding mmapped segments and trimmed/de-allocated using munmap. * If neither bit is set, then the segment was obtained using MORECORE so can be merged with surrounding MORECORE'd segments and deallocated/trimmed using MORECORE with negative arguments. */ struct malloc_segment { char* base; /* base address */ size_t size; /* allocated size */ struct malloc_segment* next; /* ptr to next segment */ flag_t sflags; /* mmap and extern flag */ }; #define is_mmapped_segment(S) ((S)->sflags & USE_MMAP_BIT) #define is_extern_segment(S) ((S)->sflags & EXTERN_BIT) typedef struct malloc_segment msegment; typedef struct malloc_segment* msegmentptr; /* ---------------------------- malloc_state ----------------------------- */ /* A malloc_state holds all of the bookkeeping for a space. The main fields are: Top The topmost chunk of the currently active segment. Its size is cached in topsize. The actual size of topmost space is topsize+TOP_FOOT_SIZE, which includes space reserved for adding fenceposts and segment records if necessary when getting more space from the system. The size at which to autotrim top is cached from mparams in trim_check, except that it is disabled if an autotrim fails. Designated victim (dv) This is the preferred chunk for servicing small requests that don't have exact fits. It is normally the chunk split off most recently to service another small request. Its size is cached in dvsize. The link fields of this chunk are not maintained since it is not kept in a bin. SmallBins An array of bin headers for free chunks. These bins hold chunks with sizes less than MIN_LARGE_SIZE bytes. Each bin contains chunks of all the same size, spaced 8 bytes apart. To simplify use in double-linked lists, each bin header acts as a malloc_chunk pointing to the real first node, if it exists (else pointing to itself). This avoids special-casing for headers. But to avoid waste, we allocate only the fd/bk pointers of bins, and then use repositioning tricks to treat these as the fields of a chunk. TreeBins Treebins are pointers to the roots of trees holding a range of sizes. There are 2 equally spaced treebins for each power of two from TREE_SHIFT to TREE_SHIFT+16. The last bin holds anything larger. Bin maps There is one bit map for small bins ("smallmap") and one for treebins ("treemap). Each bin sets its bit when non-empty, and clears the bit when empty. Bit operations are then used to avoid bin-by-bin searching -- nearly all "search" is done without ever looking at bins that won't be selected. The bit maps conservatively use 32 bits per map word, even if on 64bit system. For a good description of some of the bit-based techniques used here, see Henry S. Warren Jr's book "Hacker's Delight" (and supplement at http://hackersdelight.org/). Many of these are intended to reduce the branchiness of paths through malloc etc, as well as to reduce the number of memory locations read or written. Segments A list of segments headed by an embedded malloc_segment record representing the initial space. Address check support The least_addr field is the least address ever obtained from MORECORE or MMAP. Attempted frees and reallocs of any address less than this are trapped (unless INSECURE is defined). Magic tag A cross-check field that should always hold same value as mparams.magic. Max allowed footprint The maximum allowed bytes to allocate from system (zero means no limit) Flags Bits recording whether to use MMAP, locks, or contiguous MORECORE Statistics Each space keeps track of current and maximum system memory obtained via MORECORE or MMAP. Trim support Fields holding the amount of unused topmost memory that should trigger trimming, and a counter to force periodic scanning to release unused non-topmost segments. Locking If USE_LOCKS is defined, the "mutex" lock is acquired and released around every public call using this mspace. Extension support A void* pointer and a size_t field that can be used to help implement extensions to this malloc. */ /* Bin types, widths and sizes */ #define NSMALLBINS (32U) #define NTREEBINS (32U) #define SMALLBIN_SHIFT (3U) #define SMALLBIN_WIDTH (SIZE_T_ONE << SMALLBIN_SHIFT) #define TREEBIN_SHIFT (8U) #define MIN_LARGE_SIZE (SIZE_T_ONE << TREEBIN_SHIFT) #define MAX_SMALL_SIZE (MIN_LARGE_SIZE - SIZE_T_ONE) #define MAX_SMALL_REQUEST (MAX_SMALL_SIZE - CHUNK_ALIGN_MASK - CHUNK_OVERHEAD) struct malloc_state { binmap_t smallmap; binmap_t treemap; size_t dvsize; size_t topsize; char* least_addr; mchunkptr dv; mchunkptr top; size_t trim_check; size_t release_checks; size_t magic; mchunkptr smallbins[(NSMALLBINS+1)*2]; tbinptr treebins[NTREEBINS]; size_t footprint; size_t max_footprint; size_t footprint_limit; /* zero means no limit */ flag_t mflags; #if USE_LOCKS MLOCK_T mutex; /* locate lock among fields that rarely change */ #endif /* USE_LOCKS */ msegment seg; void* extp; /* Unused but available for extensions */ size_t exts; }; typedef struct malloc_state* mstate; /* ------------- Global malloc_state and malloc_params ------------------- */ /* malloc_params holds global properties, including those that can be dynamically set using mallopt. There is a single instance, mparams, initialized in init_mparams. Note that the non-zeroness of "magic" also serves as an initialization flag. */ struct malloc_params { size_t magic; size_t page_size; size_t granularity; size_t mmap_threshold; size_t trim_threshold; flag_t default_mflags; }; static struct malloc_params mparams; /* Ensure mparams initialized */ #define ensure_initialization() (void)(mparams.magic != 0 || init_mparams()) #if !ONLY_MSPACES /* The global malloc_state used for all non-"mspace" calls */ static struct malloc_state _gm_; #define gm (&_gm_) #define is_global(M) ((M) == &_gm_) #endif /* !ONLY_MSPACES */ #define is_initialized(M) ((M)->top != 0) /* -------------------------- system alloc setup ------------------------- */ /* Operations on mflags */ #define use_lock(M) ((M)->mflags & USE_LOCK_BIT) #define enable_lock(M) ((M)->mflags |= USE_LOCK_BIT) #if USE_LOCKS #define disable_lock(M) ((M)->mflags &= ~USE_LOCK_BIT) #else #define disable_lock(M) #endif #define use_mmap(M) ((M)->mflags & USE_MMAP_BIT) #define enable_mmap(M) ((M)->mflags |= USE_MMAP_BIT) #if HAVE_MMAP #define disable_mmap(M) ((M)->mflags &= ~USE_MMAP_BIT) #else #define disable_mmap(M) #endif #define use_noncontiguous(M) ((M)->mflags & USE_NONCONTIGUOUS_BIT) #define disable_contiguous(M) ((M)->mflags |= USE_NONCONTIGUOUS_BIT) #define set_lock(M,L)\ ((M)->mflags = (L)?\ ((M)->mflags | USE_LOCK_BIT) :\ ((M)->mflags & ~USE_LOCK_BIT)) /* page-align a size */ #define page_align(S)\ (((S) + (mparams.page_size - SIZE_T_ONE)) & ~(mparams.page_size - SIZE_T_ONE)) /* granularity-align a size */ #define granularity_align(S)\ (((S) + (mparams.granularity - SIZE_T_ONE))\ & ~(mparams.granularity - SIZE_T_ONE)) /* For mmap, use granularity alignment on windows, else page-align */ #ifdef WIN32 #define mmap_align(S) granularity_align(S) #else #define mmap_align(S) page_align(S) #endif /* For sys_alloc, enough padding to ensure can malloc request on success */ #define SYS_ALLOC_PADDING (TOP_FOOT_SIZE + MALLOC_ALIGNMENT) #define is_page_aligned(S)\ (((size_t)(S) & (mparams.page_size - SIZE_T_ONE)) == 0) #define is_granularity_aligned(S)\ (((size_t)(S) & (mparams.granularity - SIZE_T_ONE)) == 0) /* True if segment S holds address A */ #define segment_holds(S, A)\ ((char*)(A) >= S->base && (char*)(A) < S->base + S->size) /* Return segment holding given address */ static msegmentptr segment_holding(mstate m, char* addr) { msegmentptr sp = &m->seg; for (;;) { if (addr >= sp->base && addr < sp->base + sp->size) return sp; if ((sp = sp->next) == 0) return 0; } return 0; } /* Return true if segment contains a segment link */ static int has_segment_link(mstate m, msegmentptr ss) { msegmentptr sp = &m->seg; for (;;) { if ((char*)sp >= ss->base && (char*)sp < ss->base + ss->size) return 1; if ((sp = sp->next) == 0) return 0; } return 0; } #ifndef MORECORE_CANNOT_TRIM #define should_trim(M,s) ((s) > (M)->trim_check) #else /* MORECORE_CANNOT_TRIM */ #define should_trim(M,s) (0) #endif /* MORECORE_CANNOT_TRIM */ /* TOP_FOOT_SIZE is padding at the end of a segment, including space that may be needed to place segment records and fenceposts when new noncontiguous segments are added. */ #define TOP_FOOT_SIZE\ (align_offset(chunk2mem(0))+pad_request(sizeof(struct malloc_segment))+MIN_CHUNK_SIZE) /* ------------------------------- Hooks -------------------------------- */ /* PREACTION should be defined to return 0 on success, and nonzero on failure. If you are not using locking, you can redefine these to do anything you like. */ #if USE_LOCKS #define PREACTION(M) ((use_lock(M))? ACQUIRE_LOCK(&(M)->mutex) : 0) #define POSTACTION(M) { if (use_lock(M)) RELEASE_LOCK(&(M)->mutex); } #else /* USE_LOCKS */ #ifndef PREACTION #define PREACTION(M) (0) #endif /* PREACTION */ #ifndef POSTACTION #define POSTACTION(M) #endif /* POSTACTION */ #endif /* USE_LOCKS */ /* CORRUPTION_ERROR_ACTION is triggered upon detected bad addresses. USAGE_ERROR_ACTION is triggered on detected bad frees and reallocs. The argument p is an address that might have triggered the fault. It is ignored by the two predefined actions, but might be useful in custom actions that try to help diagnose errors. */ #if PROCEED_ON_ERROR /* A count of the number of corruption errors causing resets */ int malloc_corruption_error_count; /* default corruption action */ static void reset_on_error(mstate m); #define CORRUPTION_ERROR_ACTION(m) reset_on_error(m) #define USAGE_ERROR_ACTION(m, p) #else /* PROCEED_ON_ERROR */ #ifndef CORRUPTION_ERROR_ACTION #define CORRUPTION_ERROR_ACTION(m) ABORT #endif /* CORRUPTION_ERROR_ACTION */ #ifndef USAGE_ERROR_ACTION #define USAGE_ERROR_ACTION(m,p) ABORT #endif /* USAGE_ERROR_ACTION */ #endif /* PROCEED_ON_ERROR */ /* -------------------------- Debugging setup ---------------------------- */ #if ! DEBUG #define check_free_chunk(M,P) #define check_inuse_chunk(M,P) #define check_malloced_chunk(M,P,N) #define check_mmapped_chunk(M,P) #define check_malloc_state(M) #define check_top_chunk(M,P) #else /* DEBUG */ #define check_free_chunk(M,P) do_check_free_chunk(M,P) #define check_inuse_chunk(M,P) do_check_inuse_chunk(M,P) #define check_top_chunk(M,P) do_check_top_chunk(M,P) #define check_malloced_chunk(M,P,N) do_check_malloced_chunk(M,P,N) #define check_mmapped_chunk(M,P) do_check_mmapped_chunk(M,P) #define check_malloc_state(M) do_check_malloc_state(M) static void do_check_any_chunk(mstate m, mchunkptr p); static void do_check_top_chunk(mstate m, mchunkptr p); static void do_check_mmapped_chunk(mstate m, mchunkptr p); static void do_check_inuse_chunk(mstate m, mchunkptr p); static void do_check_free_chunk(mstate m, mchunkptr p); static void do_check_malloced_chunk(mstate m, void* mem, size_t s); static void do_check_tree(mstate m, tchunkptr t); static void do_check_treebin(mstate m, bindex_t i); static void do_check_smallbin(mstate m, bindex_t i); static void do_check_malloc_state(mstate m); static int bin_find(mstate m, mchunkptr x); static size_t traverse_and_check(mstate m); #endif /* DEBUG */ /* ---------------------------- Indexing Bins ---------------------------- */ #define is_small(s) (((s) >> SMALLBIN_SHIFT) < NSMALLBINS) #define small_index(s) (bindex_t)((s) >> SMALLBIN_SHIFT) #define small_index2size(i) ((i) << SMALLBIN_SHIFT) #define MIN_SMALL_INDEX (small_index(MIN_CHUNK_SIZE)) /* addressing by index. See above about smallbin repositioning */ #define smallbin_at(M, i) ((sbinptr)((char*)&((M)->smallbins[(i)<<1]))) #define treebin_at(M,i) (&((M)->treebins[i])) /* assign tree index for size S to variable I. Use x86 asm if possible */ #if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) #define compute_tree_index(S, I)\ {\ unsigned int X = S >> TREEBIN_SHIFT;\ if (X == 0)\ I = 0;\ else if (X > 0xFFFF)\ I = NTREEBINS-1;\ else {\ unsigned int K = (unsigned) sizeof(X)*__CHAR_BIT__ - 1 - (unsigned) __builtin_clz(X); \ I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\ }\ } #elif defined (__INTEL_COMPILER) #define compute_tree_index(S, I)\ {\ size_t X = S >> TREEBIN_SHIFT;\ if (X == 0)\ I = 0;\ else if (X > 0xFFFF)\ I = NTREEBINS-1;\ else {\ unsigned int K = _bit_scan_reverse (X); \ I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\ }\ } #elif defined(_MSC_VER) && _MSC_VER>=1300 #define compute_tree_index(S, I)\ {\ size_t X = S >> TREEBIN_SHIFT;\ if (X == 0)\ I = 0;\ else if (X > 0xFFFF)\ I = NTREEBINS-1;\ else {\ unsigned int K;\ _BitScanReverse((DWORD *) &K, (DWORD) X);\ I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\ }\ } #else /* GNUC */ #define compute_tree_index(S, I)\ {\ size_t X = S >> TREEBIN_SHIFT;\ if (X == 0)\ I = 0;\ else if (X > 0xFFFF)\ I = NTREEBINS-1;\ else {\ unsigned int Y = (unsigned int)X;\ unsigned int N = ((Y - 0x100) >> 16) & 8;\ unsigned int K = (((Y <<= N) - 0x1000) >> 16) & 4;\ N += K;\ N += K = (((Y <<= K) - 0x4000) >> 16) & 2;\ K = 14 - N + ((Y <<= K) >> 15);\ I = (K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1));\ }\ } #endif /* GNUC */ /* Bit representing maximum resolved size in a treebin at i */ #define bit_for_tree_index(i) \ (i == NTREEBINS-1)? (SIZE_T_BITSIZE-1) : (((i) >> 1) + TREEBIN_SHIFT - 2) /* Shift placing maximum resolved bit in a treebin at i as sign bit */ #define leftshift_for_tree_index(i) \ ((i == NTREEBINS-1)? 0 : \ ((SIZE_T_BITSIZE-SIZE_T_ONE) - (((i) >> 1) + TREEBIN_SHIFT - 2))) /* The size of the smallest chunk held in bin with index i */ #define minsize_for_tree_index(i) \ ((SIZE_T_ONE << (((i) >> 1) + TREEBIN_SHIFT)) | \ (((size_t)((i) & SIZE_T_ONE)) << (((i) >> 1) + TREEBIN_SHIFT - 1))) /* ------------------------ Operations on bin maps ----------------------- */ /* bit corresponding to given index */ #define idx2bit(i) ((binmap_t)(1) << (i)) /* Mark/Clear bits with given index */ #define mark_smallmap(M,i) ((M)->smallmap |= idx2bit(i)) #define clear_smallmap(M,i) ((M)->smallmap &= ~idx2bit(i)) #define smallmap_is_marked(M,i) ((M)->smallmap & idx2bit(i)) #define mark_treemap(M,i) ((M)->treemap |= idx2bit(i)) #define clear_treemap(M,i) ((M)->treemap &= ~idx2bit(i)) #define treemap_is_marked(M,i) ((M)->treemap & idx2bit(i)) /* isolate the least set bit of a bitmap */ #define least_bit(x) ((x) & -(x)) /* mask with all bits to left of least bit of x on */ #define left_bits(x) ((x<<1) | -(x<<1)) /* mask with all bits to left of or equal to least bit of x on */ #define same_or_left_bits(x) ((x) | -(x)) /* index corresponding to given bit. Use x86 asm if possible */ #if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) #define compute_bit2idx(X, I)\ {\ unsigned int J;\ J = __builtin_ctz(X); \ I = (bindex_t)J;\ } #elif defined (__INTEL_COMPILER) #define compute_bit2idx(X, I)\ {\ unsigned int J;\ J = _bit_scan_forward (X); \ I = (bindex_t)J;\ } #elif defined(_MSC_VER) && _MSC_VER>=1300 #define compute_bit2idx(X, I)\ {\ unsigned int J;\ _BitScanForward((DWORD *) &J, X);\ I = (bindex_t)J;\ } #elif USE_BUILTIN_FFS #define compute_bit2idx(X, I) I = ffs(X)-1 #else #define compute_bit2idx(X, I)\ {\ unsigned int Y = X - 1;\ unsigned int K = Y >> (16-4) & 16;\ unsigned int N = K; Y >>= K;\ N += K = Y >> (8-3) & 8; Y >>= K;\ N += K = Y >> (4-2) & 4; Y >>= K;\ N += K = Y >> (2-1) & 2; Y >>= K;\ N += K = Y >> (1-0) & 1; Y >>= K;\ I = (bindex_t)(N + Y);\ } #endif /* GNUC */ /* ----------------------- Runtime Check Support ------------------------- */ /* For security, the main invariant is that malloc/free/etc never writes to a static address other than malloc_state, unless static malloc_state itself has been corrupted, which cannot occur via malloc (because of these checks). In essence this means that we believe all pointers, sizes, maps etc held in malloc_state, but check all of those linked or offsetted from other embedded data structures. These checks are interspersed with main code in a way that tends to minimize their run-time cost. When FOOTERS is defined, in addition to range checking, we also verify footer fields of inuse chunks, which can be used guarantee that the mstate controlling malloc/free is intact. This is a streamlined version of the approach described by William Robertson et al in "Run-time Detection of Heap-based Overflows" LISA'03 http://www.usenix.org/events/lisa03/tech/robertson.html The footer of an inuse chunk holds the xor of its mstate and a random seed, that is checked upon calls to free() and realloc(). This is (probabalistically) unguessable from outside the program, but can be computed by any code successfully malloc'ing any chunk, so does not itself provide protection against code that has already broken security through some other means. Unlike Robertson et al, we always dynamically check addresses of all offset chunks (previous, next, etc). This turns out to be cheaper than relying on hashes. */ #if !INSECURE /* Check if address a is at least as high as any from MORECORE or MMAP */ #define ok_address(M, a) ((char*)(a) >= (M)->least_addr) /* Check if address of next chunk n is higher than base chunk p */ #define ok_next(p, n) ((char*)(p) < (char*)(n)) /* Check if p has inuse status */ #define ok_inuse(p) is_inuse(p) /* Check if p has its pinuse bit on */ #define ok_pinuse(p) pinuse(p) #else /* !INSECURE */ #define ok_address(M, a) (1) #define ok_next(b, n) (1) #define ok_inuse(p) (1) #define ok_pinuse(p) (1) #endif /* !INSECURE */ #if (FOOTERS && !INSECURE) /* Check if (alleged) mstate m has expected magic field */ #define ok_magic(M) ((M)->magic == mparams.magic) #else /* (FOOTERS && !INSECURE) */ #define ok_magic(M) (1) #endif /* (FOOTERS && !INSECURE) */ /* In gcc, use __builtin_expect to minimize impact of checks */ #if !INSECURE #if defined(__GNUC__) && __GNUC__ >= 3 #define RTCHECK(e) __builtin_expect(e, 1) #else /* GNUC */ #define RTCHECK(e) (e) #endif /* GNUC */ #else /* !INSECURE */ #define RTCHECK(e) (1) #endif /* !INSECURE */ /* macros to set up inuse chunks with or without footers */ #if !FOOTERS #define mark_inuse_foot(M,p,s) /* Macros for setting head/foot of non-mmapped chunks */ /* Set cinuse bit and pinuse bit of next chunk */ #define set_inuse(M,p,s)\ ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\ ((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT) /* Set cinuse and pinuse of this chunk and pinuse of next chunk */ #define set_inuse_and_pinuse(M,p,s)\ ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\ ((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT) /* Set size, cinuse and pinuse bit of this chunk */ #define set_size_and_pinuse_of_inuse_chunk(M, p, s)\ ((p)->head = (s|PINUSE_BIT|CINUSE_BIT)) #else /* FOOTERS */ /* Set foot of inuse chunk to be xor of mstate and seed */ #define mark_inuse_foot(M,p,s)\ (((mchunkptr)((char*)(p) + (s)))->prev_foot = ((size_t)(M) ^ mparams.magic)) #define get_mstate_for(p)\ ((mstate)(((mchunkptr)((char*)(p) +\ (chunksize(p))))->prev_foot ^ mparams.magic)) #define set_inuse(M,p,s)\ ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\ (((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT), \ mark_inuse_foot(M,p,s)) #define set_inuse_and_pinuse(M,p,s)\ ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\ (((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT),\ mark_inuse_foot(M,p,s)) #define set_size_and_pinuse_of_inuse_chunk(M, p, s)\ ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\ mark_inuse_foot(M, p, s)) #endif /* !FOOTERS */ /* ---------------------------- setting mparams -------------------------- */ #if LOCK_AT_FORK static void pre_fork(void) { ACQUIRE_LOCK(&(gm)->mutex); } static void post_fork_parent(void) { RELEASE_LOCK(&(gm)->mutex); } static void post_fork_child(void) { INITIAL_LOCK(&(gm)->mutex); } #endif /* LOCK_AT_FORK */ /* Initialize mparams */ static int init_mparams(void) { #ifdef NEED_GLOBAL_LOCK_INIT if (malloc_global_mutex_status <= 0) init_malloc_global_mutex(); #endif ACQUIRE_MALLOC_GLOBAL_LOCK(); if (mparams.magic == 0) { size_t magic; size_t psize; size_t gsize; #ifndef WIN32 psize = malloc_getpagesize; gsize = ((DEFAULT_GRANULARITY != 0)? DEFAULT_GRANULARITY : psize); #else /* WIN32 */ { SYSTEM_INFO system_info; GetSystemInfo(&system_info); psize = system_info.dwPageSize; gsize = ((DEFAULT_GRANULARITY != 0)? DEFAULT_GRANULARITY : system_info.dwAllocationGranularity); } #endif /* WIN32 */ /* Sanity-check configuration: size_t must be unsigned and as wide as pointer type. ints must be at least 4 bytes. alignment must be at least 8. Alignment, min chunk size, and page size must all be powers of 2. */ if ((sizeof(size_t) != sizeof(char*)) || (MAX_SIZE_T < MIN_CHUNK_SIZE) || (sizeof(int) < 4) || (MALLOC_ALIGNMENT < (size_t)8U) || ((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT-SIZE_T_ONE)) != 0) || ((MCHUNK_SIZE & (MCHUNK_SIZE-SIZE_T_ONE)) != 0) || ((gsize & (gsize-SIZE_T_ONE)) != 0) || ((psize & (psize-SIZE_T_ONE)) != 0)) ABORT; mparams.granularity = gsize; mparams.page_size = psize; mparams.mmap_threshold = DEFAULT_MMAP_THRESHOLD; mparams.trim_threshold = DEFAULT_TRIM_THRESHOLD; #if MORECORE_CONTIGUOUS mparams.default_mflags = USE_LOCK_BIT|USE_MMAP_BIT; #else /* MORECORE_CONTIGUOUS */ mparams.default_mflags = USE_LOCK_BIT|USE_MMAP_BIT|USE_NONCONTIGUOUS_BIT; #endif /* MORECORE_CONTIGUOUS */ #if !ONLY_MSPACES /* Set up lock for main malloc area */ gm->mflags = mparams.default_mflags; (void)INITIAL_LOCK(&gm->mutex); #endif #if LOCK_AT_FORK pthread_atfork(&pre_fork, &post_fork_parent, &post_fork_child); #endif { #if USE_DEV_RANDOM int fd; unsigned char buf[sizeof(size_t)]; /* Try to use /dev/urandom, else fall back on using time */ if ((fd = open("/dev/urandom", O_RDONLY)) >= 0 && read(fd, buf, sizeof(buf)) == sizeof(buf)) { magic = *((size_t *) buf); close(fd); } else #endif /* USE_DEV_RANDOM */ #ifdef WIN32 magic = (size_t)(GetTickCount() ^ (size_t)0x55555555U); #elif defined(LACKS_TIME_H) magic = (size_t)&magic ^ (size_t)0x55555555U; #else magic = (size_t)(time(0) ^ (size_t)0x55555555U); #endif magic |= (size_t)8U; /* ensure nonzero */ magic &= ~(size_t)7U; /* improve chances of fault for bad values */ /* Until memory modes commonly available, use volatile-write */ (*(volatile size_t *)(&(mparams.magic))) = magic; } } RELEASE_MALLOC_GLOBAL_LOCK(); return 1; } /* support for mallopt */ static int change_mparam(int param_number, int value) { size_t val; ensure_initialization(); val = (value == -1)? MAX_SIZE_T : (size_t)value; switch(param_number) { case M_TRIM_THRESHOLD: mparams.trim_threshold = val; return 1; case M_GRANULARITY: if (val >= mparams.page_size && ((val & (val-1)) == 0)) { mparams.granularity = val; return 1; } else return 0; case M_MMAP_THRESHOLD: mparams.mmap_threshold = val; return 1; default: return 0; } } #if DEBUG /* ------------------------- Debugging Support --------------------------- */ /* Check properties of any chunk, whether free, inuse, mmapped etc */ static void do_check_any_chunk(mstate m, mchunkptr p) { assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD)); assert(ok_address(m, p)); } /* Check properties of top chunk */ static void do_check_top_chunk(mstate m, mchunkptr p) { msegmentptr sp = segment_holding(m, (char*)p); size_t sz = p->head & ~INUSE_BITS; /* third-lowest bit can be set! */ assert(sp != 0); assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD)); assert(ok_address(m, p)); assert(sz == m->topsize); assert(sz > 0); assert(sz == ((sp->base + sp->size) - (char*)p) - TOP_FOOT_SIZE); assert(pinuse(p)); assert(!pinuse(chunk_plus_offset(p, sz))); } /* Check properties of (inuse) mmapped chunks */ static void do_check_mmapped_chunk(mstate m, mchunkptr p) { size_t sz = chunksize(p); size_t len = (sz + (p->prev_foot) + MMAP_FOOT_PAD); assert(is_mmapped(p)); assert(use_mmap(m)); assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD)); assert(ok_address(m, p)); assert(!is_small(sz)); assert((len & (mparams.page_size-SIZE_T_ONE)) == 0); assert(chunk_plus_offset(p, sz)->head == FENCEPOST_HEAD); assert(chunk_plus_offset(p, sz+SIZE_T_SIZE)->head == 0); } /* Check properties of inuse chunks */ static void do_check_inuse_chunk(mstate m, mchunkptr p) { do_check_any_chunk(m, p); assert(is_inuse(p)); assert(next_pinuse(p)); /* If not pinuse and not mmapped, previous chunk has OK offset */ assert(is_mmapped(p) || pinuse(p) || next_chunk(prev_chunk(p)) == p); if (is_mmapped(p)) do_check_mmapped_chunk(m, p); } /* Check properties of free chunks */ static void do_check_free_chunk(mstate m, mchunkptr p) { size_t sz = chunksize(p); mchunkptr next = chunk_plus_offset(p, sz); do_check_any_chunk(m, p); assert(!is_inuse(p)); assert(!next_pinuse(p)); assert (!is_mmapped(p)); if (p != m->dv && p != m->top) { if (sz >= MIN_CHUNK_SIZE) { assert((sz & CHUNK_ALIGN_MASK) == 0); assert(is_aligned(chunk2mem(p))); assert(next->prev_foot == sz); assert(pinuse(p)); assert (next == m->top || is_inuse(next)); assert(p->fd->bk == p); assert(p->bk->fd == p); } else /* markers are always of size SIZE_T_SIZE */ assert(sz == SIZE_T_SIZE); } } /* Check properties of malloced chunks at the point they are malloced */ static void do_check_malloced_chunk(mstate m, void* mem, size_t s) { if (mem != 0) { mchunkptr p = mem2chunk(mem); size_t sz = p->head & ~INUSE_BITS; do_check_inuse_chunk(m, p); assert((sz & CHUNK_ALIGN_MASK) == 0); assert(sz >= MIN_CHUNK_SIZE); assert(sz >= s); /* unless mmapped, size is less than MIN_CHUNK_SIZE more than request */ assert(is_mmapped(p) || sz < (s + MIN_CHUNK_SIZE)); } } /* Check a tree and its subtrees. */ static void do_check_tree(mstate m, tchunkptr t) { tchunkptr head = 0; tchunkptr u = t; bindex_t tindex = t->index; size_t tsize = chunksize(t); bindex_t idx; compute_tree_index(tsize, idx); assert(tindex == idx); assert(tsize >= MIN_LARGE_SIZE); assert(tsize >= minsize_for_tree_index(idx)); assert((idx == NTREEBINS-1) || (tsize < minsize_for_tree_index((idx+1)))); do { /* traverse through chain of same-sized nodes */ do_check_any_chunk(m, ((mchunkptr)u)); assert(u->index == tindex); assert(chunksize(u) == tsize); assert(!is_inuse(u)); assert(!next_pinuse(u)); assert(u->fd->bk == u); assert(u->bk->fd == u); if (u->parent == 0) { assert(u->child[0] == 0); assert(u->child[1] == 0); } else { assert(head == 0); /* only one node on chain has parent */ head = u; assert(u->parent != u); assert (u->parent->child[0] == u || u->parent->child[1] == u || *((tbinptr*)(u->parent)) == u); if (u->child[0] != 0) { assert(u->child[0]->parent == u); assert(u->child[0] != u); do_check_tree(m, u->child[0]); } if (u->child[1] != 0) { assert(u->child[1]->parent == u); assert(u->child[1] != u); do_check_tree(m, u->child[1]); } if (u->child[0] != 0 && u->child[1] != 0) { assert(chunksize(u->child[0]) < chunksize(u->child[1])); } } u = u->fd; } while (u != t); assert(head != 0); } /* Check all the chunks in a treebin. */ static void do_check_treebin(mstate m, bindex_t i) { tbinptr* tb = treebin_at(m, i); tchunkptr t = *tb; int empty = (m->treemap & (1U << i)) == 0; if (t == 0) assert(empty); if (!empty) do_check_tree(m, t); } /* Check all the chunks in a smallbin. */ static void do_check_smallbin(mstate m, bindex_t i) { sbinptr b = smallbin_at(m, i); mchunkptr p = b->bk; unsigned int empty = (m->smallmap & (1U << i)) == 0; if (p == b) assert(empty); if (!empty) { for (; p != b; p = p->bk) { size_t size = chunksize(p); mchunkptr q; /* each chunk claims to be free */ do_check_free_chunk(m, p); /* chunk belongs in bin */ assert(small_index(size) == i); assert(p->bk == b || chunksize(p->bk) == chunksize(p)); /* chunk is followed by an inuse chunk */ q = next_chunk(p); if (q->head != FENCEPOST_HEAD) do_check_inuse_chunk(m, q); } } } /* Find x in a bin. Used in other check functions. */ static int bin_find(mstate m, mchunkptr x) { size_t size = chunksize(x); if (is_small(size)) { bindex_t sidx = small_index(size); sbinptr b = smallbin_at(m, sidx); if (smallmap_is_marked(m, sidx)) { mchunkptr p = b; do { if (p == x) return 1; } while ((p = p->fd) != b); } } else { bindex_t tidx; compute_tree_index(size, tidx); if (treemap_is_marked(m, tidx)) { tchunkptr t = *treebin_at(m, tidx); size_t sizebits = size << leftshift_for_tree_index(tidx); while (t != 0 && chunksize(t) != size) { t = t->child[(sizebits >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]; sizebits <<= 1; } if (t != 0) { tchunkptr u = t; do { if (u == (tchunkptr)x) return 1; } while ((u = u->fd) != t); } } } return 0; } /* Traverse each chunk and check it; return total */ static size_t traverse_and_check(mstate m) { size_t sum = 0; if (is_initialized(m)) { msegmentptr s = &m->seg; sum += m->topsize + TOP_FOOT_SIZE; while (s != 0) { mchunkptr q = align_as_chunk(s->base); mchunkptr lastq = 0; assert(pinuse(q)); while (segment_holds(s, q) && q != m->top && q->head != FENCEPOST_HEAD) { sum += chunksize(q); if (is_inuse(q)) { assert(!bin_find(m, q)); do_check_inuse_chunk(m, q); } else { assert(q == m->dv || bin_find(m, q)); assert(lastq == 0 || is_inuse(lastq)); /* Not 2 consecutive free */ do_check_free_chunk(m, q); } lastq = q; q = next_chunk(q); } s = s->next; } } return sum; } /* Check all properties of malloc_state. */ static void do_check_malloc_state(mstate m) { bindex_t i; size_t total; /* check bins */ for (i = 0; i < NSMALLBINS; ++i) do_check_smallbin(m, i); for (i = 0; i < NTREEBINS; ++i) do_check_treebin(m, i); if (m->dvsize != 0) { /* check dv chunk */ do_check_any_chunk(m, m->dv); assert(m->dvsize == chunksize(m->dv)); assert(m->dvsize >= MIN_CHUNK_SIZE); assert(bin_find(m, m->dv) == 0); } if (m->top != 0) { /* check top chunk */ do_check_top_chunk(m, m->top); /*assert(m->topsize == chunksize(m->top)); redundant */ assert(m->topsize > 0); assert(bin_find(m, m->top) == 0); } total = traverse_and_check(m); assert(total <= m->footprint); assert(m->footprint <= m->max_footprint); } #endif /* DEBUG */ /* ----------------------------- statistics ------------------------------ */ #if !NO_MALLINFO static struct mallinfo internal_mallinfo(mstate m) { struct mallinfo nm = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; ensure_initialization(); if (!PREACTION(m)) { check_malloc_state(m); if (is_initialized(m)) { size_t nfree = SIZE_T_ONE; /* top always free */ size_t mfree = m->topsize + TOP_FOOT_SIZE; size_t sum = mfree; msegmentptr s = &m->seg; while (s != 0) { mchunkptr q = align_as_chunk(s->base); while (segment_holds(s, q) && q != m->top && q->head != FENCEPOST_HEAD) { size_t sz = chunksize(q); sum += sz; if (!is_inuse(q)) { mfree += sz; ++nfree; } q = next_chunk(q); } s = s->next; } nm.arena = sum; nm.ordblks = nfree; nm.hblkhd = m->footprint - sum; nm.usmblks = m->max_footprint; nm.uordblks = m->footprint - mfree; nm.fordblks = mfree; nm.keepcost = m->topsize; } POSTACTION(m); } return nm; } #endif /* !NO_MALLINFO */ #if !NO_MALLOC_STATS static void internal_malloc_stats(mstate m) { ensure_initialization(); if (!PREACTION(m)) { size_t maxfp = 0; size_t fp = 0; size_t used = 0; check_malloc_state(m); if (is_initialized(m)) { msegmentptr s = &m->seg; maxfp = m->max_footprint; fp = m->footprint; used = fp - (m->topsize + TOP_FOOT_SIZE); while (s != 0) { mchunkptr q = align_as_chunk(s->base); while (segment_holds(s, q) && q != m->top && q->head != FENCEPOST_HEAD) { if (!is_inuse(q)) used -= chunksize(q); q = next_chunk(q); } s = s->next; } } POSTACTION(m); /* drop lock */ fprintf(stderr, "max system bytes = %10lu\n", (unsigned long)(maxfp)); fprintf(stderr, "system bytes = %10lu\n", (unsigned long)(fp)); fprintf(stderr, "in use bytes = %10lu\n", (unsigned long)(used)); } } #endif /* NO_MALLOC_STATS */ /* ----------------------- Operations on smallbins ----------------------- */ /* Various forms of linking and unlinking are defined as macros. Even the ones for trees, which are very long but have very short typical paths. This is ugly but reduces reliance on inlining support of compilers. */ /* Link a free chunk into a smallbin */ #define insert_small_chunk(M, P, S) {\ bindex_t I = small_index(S);\ mchunkptr B = smallbin_at(M, I);\ mchunkptr F = B;\ assert(S >= MIN_CHUNK_SIZE);\ if (!smallmap_is_marked(M, I))\ mark_smallmap(M, I);\ else if (RTCHECK(ok_address(M, B->fd)))\ F = B->fd;\ else {\ CORRUPTION_ERROR_ACTION(M);\ }\ B->fd = P;\ F->bk = P;\ P->fd = F;\ P->bk = B;\ } /* Unlink a chunk from a smallbin */ #define unlink_small_chunk(M, P, S) {\ mchunkptr F = P->fd;\ mchunkptr B = P->bk;\ bindex_t I = small_index(S);\ assert(P != B);\ assert(P != F);\ assert(chunksize(P) == small_index2size(I));\ if (RTCHECK(F == smallbin_at(M,I) || (ok_address(M, F) && F->bk == P))) { \ if (B == F) {\ clear_smallmap(M, I);\ }\ else if (RTCHECK(B == smallbin_at(M,I) ||\ (ok_address(M, B) && B->fd == P))) {\ F->bk = B;\ B->fd = F;\ }\ else {\ CORRUPTION_ERROR_ACTION(M);\ }\ }\ else {\ CORRUPTION_ERROR_ACTION(M);\ }\ } /* Unlink the first chunk from a smallbin */ #define unlink_first_small_chunk(M, B, P, I) {\ mchunkptr F = P->fd;\ assert(P != B);\ assert(P != F);\ assert(chunksize(P) == small_index2size(I));\ if (B == F) {\ clear_smallmap(M, I);\ }\ else if (RTCHECK(ok_address(M, F) && F->bk == P)) {\ F->bk = B;\ B->fd = F;\ }\ else {\ CORRUPTION_ERROR_ACTION(M);\ }\ } /* Replace dv node, binning the old one */ /* Used only when dvsize known to be small */ #define replace_dv(M, P, S) {\ size_t DVS = M->dvsize;\ assert(is_small(DVS));\ if (DVS != 0) {\ mchunkptr DV = M->dv;\ insert_small_chunk(M, DV, DVS);\ }\ M->dvsize = S;\ M->dv = P;\ } /* ------------------------- Operations on trees ------------------------- */ /* Insert chunk into tree */ #define insert_large_chunk(M, X, S) {\ tbinptr* H;\ bindex_t I;\ compute_tree_index(S, I);\ H = treebin_at(M, I);\ X->index = I;\ X->child[0] = X->child[1] = 0;\ if (!treemap_is_marked(M, I)) {\ mark_treemap(M, I);\ *H = X;\ X->parent = (tchunkptr)H;\ X->fd = X->bk = X;\ }\ else {\ tchunkptr T = *H;\ size_t K = S << leftshift_for_tree_index(I);\ for (;;) {\ if (chunksize(T) != S) {\ tchunkptr* C = &(T->child[(K >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]);\ K <<= 1;\ if (*C != 0)\ T = *C;\ else if (RTCHECK(ok_address(M, C))) {\ *C = X;\ X->parent = T;\ X->fd = X->bk = X;\ break;\ }\ else {\ CORRUPTION_ERROR_ACTION(M);\ break;\ }\ }\ else {\ tchunkptr F = T->fd;\ if (RTCHECK(ok_address(M, T) && ok_address(M, F))) {\ T->fd = F->bk = X;\ X->fd = F;\ X->bk = T;\ X->parent = 0;\ break;\ }\ else {\ CORRUPTION_ERROR_ACTION(M);\ break;\ }\ }\ }\ }\ } /* Unlink steps: 1. If x is a chained node, unlink it from its same-sized fd/bk links and choose its bk node as its replacement. 2. If x was the last node of its size, but not a leaf node, it must be replaced with a leaf node (not merely one with an open left or right), to make sure that lefts and rights of descendents correspond properly to bit masks. We use the rightmost descendent of x. We could use any other leaf, but this is easy to locate and tends to counteract removal of leftmosts elsewhere, and so keeps paths shorter than minimally guaranteed. This doesn't loop much because on average a node in a tree is near the bottom. 3. If x is the base of a chain (i.e., has parent links) relink x's parent and children to x's replacement (or null if none). */ #define unlink_large_chunk(M, X) {\ tchunkptr XP = X->parent;\ tchunkptr R;\ if (X->bk != X) {\ tchunkptr F = X->fd;\ R = X->bk;\ if (RTCHECK(ok_address(M, F) && F->bk == X && R->fd == X)) {\ F->bk = R;\ R->fd = F;\ }\ else {\ CORRUPTION_ERROR_ACTION(M);\ }\ }\ else {\ tchunkptr* RP;\ if (((R = *(RP = &(X->child[1]))) != 0) ||\ ((R = *(RP = &(X->child[0]))) != 0)) {\ tchunkptr* CP;\ while ((*(CP = &(R->child[1])) != 0) ||\ (*(CP = &(R->child[0])) != 0)) {\ R = *(RP = CP);\ }\ if (RTCHECK(ok_address(M, RP)))\ *RP = 0;\ else {\ CORRUPTION_ERROR_ACTION(M);\ }\ }\ }\ if (XP != 0) {\ tbinptr* H = treebin_at(M, X->index);\ if (X == *H) {\ if ((*H = R) == 0) \ clear_treemap(M, X->index);\ }\ else if (RTCHECK(ok_address(M, XP))) {\ if (XP->child[0] == X) \ XP->child[0] = R;\ else \ XP->child[1] = R;\ }\ else\ CORRUPTION_ERROR_ACTION(M);\ if (R != 0) {\ if (RTCHECK(ok_address(M, R))) {\ tchunkptr C0, C1;\ R->parent = XP;\ if ((C0 = X->child[0]) != 0) {\ if (RTCHECK(ok_address(M, C0))) {\ R->child[0] = C0;\ C0->parent = R;\ }\ else\ CORRUPTION_ERROR_ACTION(M);\ }\ if ((C1 = X->child[1]) != 0) {\ if (RTCHECK(ok_address(M, C1))) {\ R->child[1] = C1;\ C1->parent = R;\ }\ else\ CORRUPTION_ERROR_ACTION(M);\ }\ }\ else\ CORRUPTION_ERROR_ACTION(M);\ }\ }\ } /* Relays to large vs small bin operations */ #define insert_chunk(M, P, S)\ if (is_small(S)) insert_small_chunk(M, P, S)\ else { tchunkptr TP = (tchunkptr)(P); insert_large_chunk(M, TP, S); } #define unlink_chunk(M, P, S)\ if (is_small(S)) unlink_small_chunk(M, P, S)\ else { tchunkptr TP = (tchunkptr)(P); unlink_large_chunk(M, TP); } /* Relays to internal calls to malloc/free from realloc, memalign etc */ #if ONLY_MSPACES #define internal_malloc(m, b) mspace_malloc(m, b) #define internal_free(m, mem) mspace_free(m,mem); #else /* ONLY_MSPACES */ #if MSPACES #define internal_malloc(m, b)\ ((m == gm)? dlmalloc(b) : mspace_malloc(m, b)) #define internal_free(m, mem)\ if (m == gm) dlfree(mem); else mspace_free(m,mem); #else /* MSPACES */ #define internal_malloc(m, b) dlmalloc(b) #define internal_free(m, mem) dlfree(mem) #endif /* MSPACES */ #endif /* ONLY_MSPACES */ /* ----------------------- Direct-mmapping chunks ----------------------- */ /* Directly mmapped chunks are set up with an offset to the start of the mmapped region stored in the prev_foot field of the chunk. This allows reconstruction of the required argument to MUNMAP when freed, and also allows adjustment of the returned chunk to meet alignment requirements (especially in memalign). */ /* Malloc using mmap */ static void* mmap_alloc(mstate m, size_t nb) { size_t mmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK); if (m->footprint_limit != 0) { size_t fp = m->footprint + mmsize; if (fp <= m->footprint || fp > m->footprint_limit) return 0; } if (mmsize > nb) { /* Check for wrap around 0 */ char* mm = (char*)(CALL_DIRECT_MMAP(mmsize)); if (mm != CMFAIL) { size_t offset = align_offset(chunk2mem(mm)); size_t psize = mmsize - offset - MMAP_FOOT_PAD; mchunkptr p = (mchunkptr)(mm + offset); p->prev_foot = offset; p->head = psize; mark_inuse_foot(m, p, psize); chunk_plus_offset(p, psize)->head = FENCEPOST_HEAD; chunk_plus_offset(p, psize+SIZE_T_SIZE)->head = 0; if (m->least_addr == 0 || mm < m->least_addr) m->least_addr = mm; if ((m->footprint += mmsize) > m->max_footprint) m->max_footprint = m->footprint; assert(is_aligned(chunk2mem(p))); check_mmapped_chunk(m, p); return chunk2mem(p); } } return 0; } /* Realloc using mmap */ static mchunkptr mmap_resize(mstate m, mchunkptr oldp, size_t nb, int flags) { size_t oldsize = chunksize(oldp); (void)flags; /* placate people compiling -Wunused */ if (is_small(nb)) /* Can't shrink mmap regions below small size */ return 0; /* Keep old chunk if big enough but not too big */ if (oldsize >= nb + SIZE_T_SIZE && (oldsize - nb) <= (mparams.granularity << 1)) return oldp; else { size_t offset = oldp->prev_foot; size_t oldmmsize = oldsize + offset + MMAP_FOOT_PAD; size_t newmmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK); char* cp = (char*)CALL_MREMAP((char*)oldp - offset, oldmmsize, newmmsize, flags); if (cp != CMFAIL) { mchunkptr newp = (mchunkptr)(cp + offset); size_t psize = newmmsize - offset - MMAP_FOOT_PAD; newp->head = psize; mark_inuse_foot(m, newp, psize); chunk_plus_offset(newp, psize)->head = FENCEPOST_HEAD; chunk_plus_offset(newp, psize+SIZE_T_SIZE)->head = 0; if (cp < m->least_addr) m->least_addr = cp; if ((m->footprint += newmmsize - oldmmsize) > m->max_footprint) m->max_footprint = m->footprint; check_mmapped_chunk(m, newp); return newp; } } return 0; } /* -------------------------- mspace management -------------------------- */ /* Initialize top chunk and its size */ static void init_top(mstate m, mchunkptr p, size_t psize) { /* Ensure alignment */ size_t offset = align_offset(chunk2mem(p)); p = (mchunkptr)((char*)p + offset); psize -= offset; m->top = p; m->topsize = psize; p->head = psize | PINUSE_BIT; /* set size of fake trailing chunk holding overhead space only once */ chunk_plus_offset(p, psize)->head = TOP_FOOT_SIZE; m->trim_check = mparams.trim_threshold; /* reset on each update */ } /* Initialize bins for a new mstate that is otherwise zeroed out */ static void init_bins(mstate m) { /* Establish circular links for smallbins */ bindex_t i; for (i = 0; i < NSMALLBINS; ++i) { sbinptr bin = smallbin_at(m,i); bin->fd = bin->bk = bin; } } #if PROCEED_ON_ERROR /* default corruption action */ static void reset_on_error(mstate m) { int i; ++malloc_corruption_error_count; /* Reinitialize fields to forget about all memory */ m->smallmap = m->treemap = 0; m->dvsize = m->topsize = 0; m->seg.base = 0; m->seg.size = 0; m->seg.next = 0; m->top = m->dv = 0; for (i = 0; i < NTREEBINS; ++i) *treebin_at(m, i) = 0; init_bins(m); } #endif /* PROCEED_ON_ERROR */ /* Allocate chunk and prepend remainder with chunk in successor base. */ static void* prepend_alloc(mstate m, char* newbase, char* oldbase, size_t nb) { mchunkptr p = align_as_chunk(newbase); mchunkptr oldfirst = align_as_chunk(oldbase); size_t psize = (char*)oldfirst - (char*)p; mchunkptr q = chunk_plus_offset(p, nb); size_t qsize = psize - nb; set_size_and_pinuse_of_inuse_chunk(m, p, nb); assert((char*)oldfirst > (char*)q); assert(pinuse(oldfirst)); assert(qsize >= MIN_CHUNK_SIZE); /* consolidate remainder with first chunk of old base */ if (oldfirst == m->top) { size_t tsize = m->topsize += qsize; m->top = q; q->head = tsize | PINUSE_BIT; check_top_chunk(m, q); } else if (oldfirst == m->dv) { size_t dsize = m->dvsize += qsize; m->dv = q; set_size_and_pinuse_of_free_chunk(q, dsize); } else { if (!is_inuse(oldfirst)) { size_t nsize = chunksize(oldfirst); unlink_chunk(m, oldfirst, nsize); oldfirst = chunk_plus_offset(oldfirst, nsize); qsize += nsize; } set_free_with_pinuse(q, qsize, oldfirst); insert_chunk(m, q, qsize); check_free_chunk(m, q); } check_malloced_chunk(m, chunk2mem(p), nb); return chunk2mem(p); } /* Add a segment to hold a new noncontiguous region */ static void add_segment(mstate m, char* tbase, size_t tsize, flag_t mmapped) { /* Determine locations and sizes of segment, fenceposts, old top */ char* old_top = (char*)m->top; msegmentptr oldsp = segment_holding(m, old_top); char* old_end = oldsp->base + oldsp->size; size_t ssize = pad_request(sizeof(struct malloc_segment)); char* rawsp = old_end - (ssize + FOUR_SIZE_T_SIZES + CHUNK_ALIGN_MASK); size_t offset = align_offset(chunk2mem(rawsp)); char* asp = rawsp + offset; char* csp = (asp < (old_top + MIN_CHUNK_SIZE))? old_top : asp; mchunkptr sp = (mchunkptr)csp; msegmentptr ss = (msegmentptr)(chunk2mem(sp)); mchunkptr tnext = chunk_plus_offset(sp, ssize); mchunkptr p = tnext; int nfences = 0; /* reset top to new space */ init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE); /* Set up segment record */ assert(is_aligned(ss)); set_size_and_pinuse_of_inuse_chunk(m, sp, ssize); *ss = m->seg; /* Push current record */ m->seg.base = tbase; m->seg.size = tsize; m->seg.sflags = mmapped; m->seg.next = ss; /* Insert trailing fenceposts */ for (;;) { mchunkptr nextp = chunk_plus_offset(p, SIZE_T_SIZE); p->head = FENCEPOST_HEAD; ++nfences; if ((char*)(&(nextp->head)) < old_end) p = nextp; else break; } assert(nfences >= 2); /* Insert the rest of old top into a bin as an ordinary free chunk */ if (csp != old_top) { mchunkptr q = (mchunkptr)old_top; size_t psize = csp - old_top; mchunkptr tn = chunk_plus_offset(q, psize); set_free_with_pinuse(q, psize, tn); insert_chunk(m, q, psize); } check_top_chunk(m, m->top); } /* -------------------------- System allocation -------------------------- */ /* Get memory from system using MORECORE or MMAP */ static void* sys_alloc(mstate m, size_t nb) { char* tbase = CMFAIL; size_t tsize = 0; flag_t mmap_flag = 0; size_t asize; /* allocation size */ ensure_initialization(); /* Directly map large chunks, but only if already initialized */ if (use_mmap(m) && nb >= mparams.mmap_threshold && m->topsize != 0) { void* mem = mmap_alloc(m, nb); if (mem != 0) return mem; } asize = granularity_align(nb + SYS_ALLOC_PADDING); if (asize <= nb) return 0; /* wraparound */ if (m->footprint_limit != 0) { size_t fp = m->footprint + asize; if (fp <= m->footprint || fp > m->footprint_limit) return 0; } /* Try getting memory in any of three ways (in most-preferred to least-preferred order): 1. A call to MORECORE that can normally contiguously extend memory. (disabled if not MORECORE_CONTIGUOUS or not HAVE_MORECORE or or main space is mmapped or a previous contiguous call failed) 2. A call to MMAP new space (disabled if not HAVE_MMAP). Note that under the default settings, if MORECORE is unable to fulfill a request, and HAVE_MMAP is true, then mmap is used as a noncontiguous system allocator. This is a useful backup strategy for systems with holes in address spaces -- in this case sbrk cannot contiguously expand the heap, but mmap may be able to find space. 3. A call to MORECORE that cannot usually contiguously extend memory. (disabled if not HAVE_MORECORE) In all cases, we need to request enough bytes from system to ensure we can malloc nb bytes upon success, so pad with enough space for top_foot, plus alignment-pad to make sure we don't lose bytes if not on boundary, and round this up to a granularity unit. */ if (MORECORE_CONTIGUOUS && !use_noncontiguous(m)) { char* br = CMFAIL; size_t ssize = asize; /* sbrk call size */ msegmentptr ss = (m->top == 0)? (void *)0 : segment_holding(m, (char*)m->top); ACQUIRE_MALLOC_GLOBAL_LOCK(); if (ss == 0) { /* First time through or recovery */ char* base = (char*)CALL_MORECORE(0); if (base != CMFAIL) { size_t fp; /* Adjust to end on a page boundary */ if (!is_page_aligned(base)) ssize += (page_align((size_t)base) - (size_t)base); fp = m->footprint + ssize; /* recheck limits */ if (ssize > nb && ssize < HALF_MAX_SIZE_T && (m->footprint_limit == 0 || (fp > m->footprint && fp <= m->footprint_limit)) && (br = (char*)(CALL_MORECORE(ssize))) == base) { tbase = base; tsize = ssize; } } } else { /* Subtract out existing available top space from MORECORE request. */ ssize = granularity_align(nb - m->topsize + SYS_ALLOC_PADDING); /* Use mem here only if it did continuously extend old space */ if (ssize < HALF_MAX_SIZE_T && (br = (char*)(CALL_MORECORE(ssize))) == ss->base+ss->size) { tbase = br; tsize = ssize; } } if (tbase == CMFAIL) { /* Cope with partial failure */ if (br != CMFAIL) { /* Try to use/extend the space we did get */ if (ssize < HALF_MAX_SIZE_T && ssize < nb + SYS_ALLOC_PADDING) { size_t esize = granularity_align(nb + SYS_ALLOC_PADDING - ssize); if (esize < HALF_MAX_SIZE_T) { char* end = (char*)CALL_MORECORE(esize); if (end != CMFAIL) ssize += esize; else { /* Can't use; try to release */ (void) CALL_MORECORE(-ssize); br = CMFAIL; } } } } if (br != CMFAIL) { /* Use the space we did get */ tbase = br; tsize = ssize; } else disable_contiguous(m); /* Don't try contiguous path in the future */ } RELEASE_MALLOC_GLOBAL_LOCK(); } if (HAVE_MMAP && tbase == CMFAIL) { /* Try MMAP */ char* mp = (char*)(CALL_MMAP(asize)); if (mp != CMFAIL) { tbase = mp; tsize = asize; mmap_flag = USE_MMAP_BIT; } } if (HAVE_MORECORE && tbase == CMFAIL) { /* Try noncontiguous MORECORE */ if (asize < HALF_MAX_SIZE_T) { char* br = CMFAIL; char* end = CMFAIL; ACQUIRE_MALLOC_GLOBAL_LOCK(); br = (char*)(CALL_MORECORE(asize)); end = (char*)(CALL_MORECORE(0)); RELEASE_MALLOC_GLOBAL_LOCK(); if (br != CMFAIL && end != CMFAIL && br < end) { size_t ssize = end - br; if (ssize > nb + TOP_FOOT_SIZE) { tbase = br; tsize = ssize; } } } } if (tbase != CMFAIL) { if ((m->footprint += tsize) > m->max_footprint) m->max_footprint = m->footprint; if (!is_initialized(m)) { /* first-time initialization */ if (m->least_addr == 0 || tbase < m->least_addr) m->least_addr = tbase; m->seg.base = tbase; m->seg.size = tsize; m->seg.sflags = mmap_flag; m->magic = mparams.magic; m->release_checks = MAX_RELEASE_CHECK_RATE; init_bins(m); #if !ONLY_MSPACES if (is_global(m)) init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE); else #endif { /* Offset top by embedded malloc_state */ mchunkptr mn = next_chunk(mem2chunk(m)); init_top(m, mn, (size_t)((tbase + tsize) - (char*)mn) -TOP_FOOT_SIZE); } } else { /* Try to merge with an existing segment */ msegmentptr sp = &m->seg; /* Only consider most recent segment if traversal suppressed */ while (sp != 0 && tbase != sp->base + sp->size) sp = (NO_SEGMENT_TRAVERSAL) ? 0 : sp->next; if (sp != 0 && !is_extern_segment(sp) && (sp->sflags & USE_MMAP_BIT) == mmap_flag && segment_holds(sp, m->top)) { /* append */ sp->size += tsize; init_top(m, m->top, m->topsize + tsize); } else { if (tbase < m->least_addr) m->least_addr = tbase; sp = &m->seg; while (sp != 0 && sp->base != tbase + tsize) sp = (NO_SEGMENT_TRAVERSAL) ? 0 : sp->next; if (sp != 0 && !is_extern_segment(sp) && (sp->sflags & USE_MMAP_BIT) == mmap_flag) { char* oldbase = sp->base; sp->base = tbase; sp->size += tsize; return prepend_alloc(m, tbase, oldbase, nb); } else add_segment(m, tbase, tsize, mmap_flag); } } if (nb < m->topsize) { /* Allocate from new or extended top space */ size_t rsize = m->topsize -= nb; mchunkptr p = m->top; mchunkptr r = m->top = chunk_plus_offset(p, nb); r->head = rsize | PINUSE_BIT; set_size_and_pinuse_of_inuse_chunk(m, p, nb); check_top_chunk(m, m->top); check_malloced_chunk(m, chunk2mem(p), nb); return chunk2mem(p); } } MALLOC_FAILURE_ACTION; return 0; } /* ----------------------- system deallocation -------------------------- */ /* Unmap and unlink any mmapped segments that don't contain used chunks */ static size_t release_unused_segments(mstate m) { size_t released = 0; int nsegs = 0; msegmentptr pred = &m->seg; msegmentptr sp = pred->next; while (sp != 0) { char* base = sp->base; size_t size = sp->size; msegmentptr next = sp->next; ++nsegs; if (is_mmapped_segment(sp) && !is_extern_segment(sp)) { mchunkptr p = align_as_chunk(base); size_t psize = chunksize(p); /* Can unmap if first chunk holds entire segment and not pinned */ if (!is_inuse(p) && (char*)p + psize >= base + size - TOP_FOOT_SIZE) { tchunkptr tp = (tchunkptr)p; assert(segment_holds(sp, (char*)sp)); if (p == m->dv) { m->dv = 0; m->dvsize = 0; } else { unlink_large_chunk(m, tp); } if (CALL_MUNMAP(base, size) == 0) { released += size; m->footprint -= size; /* unlink obsoleted record */ sp = pred; sp->next = next; } else { /* back out if cannot unmap */ insert_large_chunk(m, tp, psize); } } } if (NO_SEGMENT_TRAVERSAL) /* scan only first segment */ break; pred = sp; sp = next; } /* Reset check counter */ m->release_checks = (((size_t) nsegs > (size_t) MAX_RELEASE_CHECK_RATE)? (size_t) nsegs : (size_t) MAX_RELEASE_CHECK_RATE); return released; } static int sys_trim(mstate m, size_t pad) { size_t released = 0; ensure_initialization(); if (pad < MAX_REQUEST && is_initialized(m)) { pad += TOP_FOOT_SIZE; /* ensure enough room for segment overhead */ if (m->topsize > pad) { /* Shrink top space in granularity-size units, keeping at least one */ size_t unit = mparams.granularity; size_t extra = ((m->topsize - pad + (unit - SIZE_T_ONE)) / unit - SIZE_T_ONE) * unit; msegmentptr sp = segment_holding(m, (char*)m->top); if (!is_extern_segment(sp)) { if (is_mmapped_segment(sp)) { if (HAVE_MMAP && sp->size >= extra && !has_segment_link(m, sp)) { /* can't shrink if pinned */ size_t newsize = sp->size - extra; (void)newsize; /* placate people compiling -Wunused-variable */ /* Prefer mremap, fall back to munmap */ if ((CALL_MREMAP(sp->base, sp->size, newsize, 0) != MFAIL) || (CALL_MUNMAP(sp->base + newsize, extra) == 0)) { released = extra; } } } else if (HAVE_MORECORE) { if (extra >= HALF_MAX_SIZE_T) /* Avoid wrapping negative */ extra = (HALF_MAX_SIZE_T) + SIZE_T_ONE - unit; ACQUIRE_MALLOC_GLOBAL_LOCK(); { /* Make sure end of memory is where we last set it. */ char* old_br = (char*)(CALL_MORECORE(0)); if (old_br == sp->base + sp->size) { char* rel_br = (char*)(CALL_MORECORE(-extra)); char* new_br = (char*)(CALL_MORECORE(0)); if (rel_br != CMFAIL && new_br < old_br) released = old_br - new_br; } } RELEASE_MALLOC_GLOBAL_LOCK(); } } if (released != 0) { sp->size -= released; m->footprint -= released; init_top(m, m->top, m->topsize - released); check_top_chunk(m, m->top); } } /* Unmap any unused mmapped segments */ if (HAVE_MMAP) released += release_unused_segments(m); /* On failure, disable autotrim to avoid repeated failed future calls */ if (released == 0 && m->topsize > m->trim_check) m->trim_check = MAX_SIZE_T; } return (released != 0)? 1 : 0; } /* Consolidate and bin a chunk. Differs from exported versions of free mainly in that the chunk need not be marked as inuse. */ static void dispose_chunk(mstate m, mchunkptr p, size_t psize) { mchunkptr next = chunk_plus_offset(p, psize); if (!pinuse(p)) { mchunkptr prev; size_t prevsize = p->prev_foot; if (is_mmapped(p)) { psize += prevsize + MMAP_FOOT_PAD; if (CALL_MUNMAP((char*)p - prevsize, psize) == 0) m->footprint -= psize; return; } prev = chunk_minus_offset(p, prevsize); psize += prevsize; p = prev; if (RTCHECK(ok_address(m, prev))) { /* consolidate backward */ if (p != m->dv) { unlink_chunk(m, p, prevsize); } else if ((next->head & INUSE_BITS) == INUSE_BITS) { m->dvsize = psize; set_free_with_pinuse(p, psize, next); return; } } else { CORRUPTION_ERROR_ACTION(m); return; } } if (RTCHECK(ok_address(m, next))) { if (!cinuse(next)) { /* consolidate forward */ if (next == m->top) { size_t tsize = m->topsize += psize; m->top = p; p->head = tsize | PINUSE_BIT; if (p == m->dv) { m->dv = 0; m->dvsize = 0; } return; } else if (next == m->dv) { size_t dsize = m->dvsize += psize; m->dv = p; set_size_and_pinuse_of_free_chunk(p, dsize); return; } else { size_t nsize = chunksize(next); psize += nsize; unlink_chunk(m, next, nsize); set_size_and_pinuse_of_free_chunk(p, psize); if (p == m->dv) { m->dvsize = psize; return; } } } else { set_free_with_pinuse(p, psize, next); } insert_chunk(m, p, psize); } else { CORRUPTION_ERROR_ACTION(m); } } /* ---------------------------- malloc --------------------------- */ /* allocate a large request from the best fitting chunk in a treebin */ static void* tmalloc_large(mstate m, size_t nb) { tchunkptr v = 0; size_t rsize = -nb; /* Unsigned negation */ tchunkptr t; bindex_t idx; compute_tree_index(nb, idx); if ((t = *treebin_at(m, idx)) != 0) { /* Traverse tree for this bin looking for node with size == nb */ size_t sizebits = nb << leftshift_for_tree_index(idx); tchunkptr rst = 0; /* The deepest untaken right subtree */ for (;;) { tchunkptr rt; size_t trem = chunksize(t) - nb; if (trem < rsize) { v = t; if ((rsize = trem) == 0) break; } rt = t->child[1]; t = t->child[(sizebits >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]; if (rt != 0 && rt != t) rst = rt; if (t == 0) { t = rst; /* set t to least subtree holding sizes > nb */ break; } sizebits <<= 1; } } if (t == 0 && v == 0) { /* set t to root of next non-empty treebin */ binmap_t leftbits = left_bits(idx2bit(idx)) & m->treemap; if (leftbits != 0) { bindex_t i; binmap_t leastbit = least_bit(leftbits); compute_bit2idx(leastbit, i); t = *treebin_at(m, i); } } while (t != 0) { /* find smallest of tree or subtree */ size_t trem = chunksize(t) - nb; if (trem < rsize) { rsize = trem; v = t; } t = leftmost_child(t); } /* If dv is a better fit, return 0 so malloc will use it */ if (v != 0 && rsize < (size_t)(m->dvsize - nb)) { if (RTCHECK(ok_address(m, v))) { /* split */ mchunkptr r = chunk_plus_offset(v, nb); assert(chunksize(v) == rsize + nb); if (RTCHECK(ok_next(v, r))) { unlink_large_chunk(m, v); if (rsize < MIN_CHUNK_SIZE) set_inuse_and_pinuse(m, v, (rsize + nb)); else { set_size_and_pinuse_of_inuse_chunk(m, v, nb); set_size_and_pinuse_of_free_chunk(r, rsize); insert_chunk(m, r, rsize); } return chunk2mem(v); } } CORRUPTION_ERROR_ACTION(m); } return 0; } /* allocate a small request from the best fitting chunk in a treebin */ static void* tmalloc_small(mstate m, size_t nb) { tchunkptr t, v; size_t rsize; bindex_t i; binmap_t leastbit = least_bit(m->treemap); compute_bit2idx(leastbit, i); v = t = *treebin_at(m, i); rsize = chunksize(t) - nb; while ((t = leftmost_child(t)) != 0) { size_t trem = chunksize(t) - nb; if (trem < rsize) { rsize = trem; v = t; } } if (RTCHECK(ok_address(m, v))) { mchunkptr r = chunk_plus_offset(v, nb); assert(chunksize(v) == rsize + nb); if (RTCHECK(ok_next(v, r))) { unlink_large_chunk(m, v); if (rsize < MIN_CHUNK_SIZE) set_inuse_and_pinuse(m, v, (rsize + nb)); else { set_size_and_pinuse_of_inuse_chunk(m, v, nb); set_size_and_pinuse_of_free_chunk(r, rsize); replace_dv(m, r, rsize); } return chunk2mem(v); } } CORRUPTION_ERROR_ACTION(m); return 0; } #if !ONLY_MSPACES void* dlmalloc(size_t bytes) { /* Basic algorithm: If a small request (< 256 bytes minus per-chunk overhead): 1. If one exists, use a remainderless chunk in associated smallbin. (Remainderless means that there are too few excess bytes to represent as a chunk.) 2. If it is big enough, use the dv chunk, which is normally the chunk adjacent to the one used for the most recent small request. 3. If one exists, split the smallest available chunk in a bin, saving remainder in dv. 4. If it is big enough, use the top chunk. 5. If available, get memory from system and use it Otherwise, for a large request: 1. Find the smallest available binned chunk that fits, and use it if it is better fitting than dv chunk, splitting if necessary. 2. If better fitting than any binned chunk, use the dv chunk. 3. If it is big enough, use the top chunk. 4. If request size >= mmap threshold, try to directly mmap this chunk. 5. If available, get memory from system and use it The ugly goto's here ensure that postaction occurs along all paths. */ #if USE_LOCKS ensure_initialization(); /* initialize in sys_alloc if not using locks */ #endif if (!PREACTION(gm)) { void* mem; size_t nb; if (bytes <= MAX_SMALL_REQUEST) { bindex_t idx; binmap_t smallbits; nb = (bytes < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(bytes); idx = small_index(nb); smallbits = gm->smallmap >> idx; if ((smallbits & 0x3U) != 0) { /* Remainderless fit to a smallbin. */ mchunkptr b, p; idx += ~smallbits & 1; /* Uses next bin if idx empty */ b = smallbin_at(gm, idx); p = b->fd; assert(chunksize(p) == small_index2size(idx)); unlink_first_small_chunk(gm, b, p, idx); set_inuse_and_pinuse(gm, p, small_index2size(idx)); mem = chunk2mem(p); check_malloced_chunk(gm, mem, nb); goto postaction; } else if (nb > gm->dvsize) { if (smallbits != 0) { /* Use chunk in next nonempty smallbin */ mchunkptr b, p, r; size_t rsize; bindex_t i; binmap_t leftbits = (smallbits << idx) & left_bits(idx2bit(idx)); binmap_t leastbit = least_bit(leftbits); compute_bit2idx(leastbit, i); b = smallbin_at(gm, i); p = b->fd; assert(chunksize(p) == small_index2size(i)); unlink_first_small_chunk(gm, b, p, i); rsize = small_index2size(i) - nb; /* Fit here cannot be remainderless if 4byte sizes */ if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE) set_inuse_and_pinuse(gm, p, small_index2size(i)); else { set_size_and_pinuse_of_inuse_chunk(gm, p, nb); r = chunk_plus_offset(p, nb); set_size_and_pinuse_of_free_chunk(r, rsize); replace_dv(gm, r, rsize); } mem = chunk2mem(p); check_malloced_chunk(gm, mem, nb); goto postaction; } else if (gm->treemap != 0 && (mem = tmalloc_small(gm, nb)) != 0) { check_malloced_chunk(gm, mem, nb); goto postaction; } } } else if (bytes >= MAX_REQUEST) nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */ else { nb = pad_request(bytes); if (gm->treemap != 0 && (mem = tmalloc_large(gm, nb)) != 0) { check_malloced_chunk(gm, mem, nb); goto postaction; } } if (nb <= gm->dvsize) { size_t rsize = gm->dvsize - nb; mchunkptr p = gm->dv; if (rsize >= MIN_CHUNK_SIZE) { /* split dv */ mchunkptr r = gm->dv = chunk_plus_offset(p, nb); gm->dvsize = rsize; set_size_and_pinuse_of_free_chunk(r, rsize); set_size_and_pinuse_of_inuse_chunk(gm, p, nb); } else { /* exhaust dv */ size_t dvs = gm->dvsize; gm->dvsize = 0; gm->dv = 0; set_inuse_and_pinuse(gm, p, dvs); } mem = chunk2mem(p); check_malloced_chunk(gm, mem, nb); goto postaction; } else if (nb < gm->topsize) { /* Split top */ size_t rsize = gm->topsize -= nb; mchunkptr p = gm->top; mchunkptr r = gm->top = chunk_plus_offset(p, nb); r->head = rsize | PINUSE_BIT; set_size_and_pinuse_of_inuse_chunk(gm, p, nb); mem = chunk2mem(p); check_top_chunk(gm, gm->top); check_malloced_chunk(gm, mem, nb); goto postaction; } mem = sys_alloc(gm, nb); postaction: POSTACTION(gm); return mem; } return 0; } /* ---------------------------- free --------------------------- */ void dlfree(void* mem) { /* Consolidate freed chunks with preceeding or succeeding bordering free chunks, if they exist, and then place in a bin. Intermixed with special cases for top, dv, mmapped chunks, and usage errors. */ if (mem != 0) { mchunkptr p = mem2chunk(mem); #if FOOTERS mstate fm = get_mstate_for(p); if (!ok_magic(fm)) { USAGE_ERROR_ACTION(fm, p); return; } #else /* FOOTERS */ #define fm gm #endif /* FOOTERS */ if (!PREACTION(fm)) { check_inuse_chunk(fm, p); if (RTCHECK(ok_address(fm, p) && ok_inuse(p))) { size_t psize = chunksize(p); mchunkptr next = chunk_plus_offset(p, psize); if (!pinuse(p)) { size_t prevsize = p->prev_foot; if (is_mmapped(p)) { psize += prevsize + MMAP_FOOT_PAD; if (CALL_MUNMAP((char*)p - prevsize, psize) == 0) fm->footprint -= psize; goto postaction; } else { mchunkptr prev = chunk_minus_offset(p, prevsize); psize += prevsize; p = prev; if (RTCHECK(ok_address(fm, prev))) { /* consolidate backward */ if (p != fm->dv) { unlink_chunk(fm, p, prevsize); } else if ((next->head & INUSE_BITS) == INUSE_BITS) { fm->dvsize = psize; set_free_with_pinuse(p, psize, next); goto postaction; } } else goto erroraction; } } if (RTCHECK(ok_next(p, next) && ok_pinuse(next))) { if (!cinuse(next)) { /* consolidate forward */ if (next == fm->top) { size_t tsize = fm->topsize += psize; fm->top = p; p->head = tsize | PINUSE_BIT; if (p == fm->dv) { fm->dv = 0; fm->dvsize = 0; } if (should_trim(fm, tsize)) sys_trim(fm, 0); goto postaction; } else if (next == fm->dv) { size_t dsize = fm->dvsize += psize; fm->dv = p; set_size_and_pinuse_of_free_chunk(p, dsize); goto postaction; } else { size_t nsize = chunksize(next); psize += nsize; unlink_chunk(fm, next, nsize); set_size_and_pinuse_of_free_chunk(p, psize); if (p == fm->dv) { fm->dvsize = psize; goto postaction; } } } else set_free_with_pinuse(p, psize, next); if (is_small(psize)) { insert_small_chunk(fm, p, psize); check_free_chunk(fm, p); } else { tchunkptr tp = (tchunkptr)p; insert_large_chunk(fm, tp, psize); check_free_chunk(fm, p); if (--fm->release_checks == 0) release_unused_segments(fm); } goto postaction; } } erroraction: USAGE_ERROR_ACTION(fm, p); postaction: POSTACTION(fm); } } #if !FOOTERS #undef fm #endif /* FOOTERS */ } void* dlcalloc(size_t n_elements, size_t elem_size) { void* mem; size_t req = 0; if (n_elements != 0) { req = n_elements * elem_size; if (((n_elements | elem_size) & ~(size_t)0xffff) && (req / n_elements != elem_size)) req = MAX_SIZE_T; /* force downstream failure on overflow */ } mem = dlmalloc(req); if (mem != 0 && calloc_must_clear(mem2chunk(mem))) memset(mem, 0, req); return mem; } #endif /* !ONLY_MSPACES */ /* ------------ Internal support for realloc, memalign, etc -------------- */ /* Try to realloc; only in-place unless can_move true */ static mchunkptr try_realloc_chunk(mstate m, mchunkptr p, size_t nb, int can_move) { mchunkptr newp = 0; size_t oldsize = chunksize(p); mchunkptr next = chunk_plus_offset(p, oldsize); if (RTCHECK(ok_address(m, p) && ok_inuse(p) && ok_next(p, next) && ok_pinuse(next))) { if (is_mmapped(p)) { newp = mmap_resize(m, p, nb, can_move); } else if (oldsize >= nb) { /* already big enough */ size_t rsize = oldsize - nb; if (rsize >= MIN_CHUNK_SIZE) { /* split off remainder */ mchunkptr r = chunk_plus_offset(p, nb); set_inuse(m, p, nb); set_inuse(m, r, rsize); dispose_chunk(m, r, rsize); } newp = p; } else if (next == m->top) { /* extend into top */ if (oldsize + m->topsize > nb) { size_t newsize = oldsize + m->topsize; size_t newtopsize = newsize - nb; mchunkptr newtop = chunk_plus_offset(p, nb); set_inuse(m, p, nb); newtop->head = newtopsize |PINUSE_BIT; m->top = newtop; m->topsize = newtopsize; newp = p; } } else if (next == m->dv) { /* extend into dv */ size_t dvs = m->dvsize; if (oldsize + dvs >= nb) { size_t dsize = oldsize + dvs - nb; if (dsize >= MIN_CHUNK_SIZE) { mchunkptr r = chunk_plus_offset(p, nb); mchunkptr n = chunk_plus_offset(r, dsize); set_inuse(m, p, nb); set_size_and_pinuse_of_free_chunk(r, dsize); clear_pinuse(n); m->dvsize = dsize; m->dv = r; } else { /* exhaust dv */ size_t newsize = oldsize + dvs; set_inuse(m, p, newsize); m->dvsize = 0; m->dv = 0; } newp = p; } } else if (!cinuse(next)) { /* extend into next free chunk */ size_t nextsize = chunksize(next); if (oldsize + nextsize >= nb) { size_t rsize = oldsize + nextsize - nb; unlink_chunk(m, next, nextsize); if (rsize < MIN_CHUNK_SIZE) { size_t newsize = oldsize + nextsize; set_inuse(m, p, newsize); } else { mchunkptr r = chunk_plus_offset(p, nb); set_inuse(m, p, nb); set_inuse(m, r, rsize); dispose_chunk(m, r, rsize); } newp = p; } } } else { USAGE_ERROR_ACTION(m, chunk2mem(p)); } return newp; } static void* internal_memalign(mstate m, size_t alignment, size_t bytes) { void* mem = 0; if (alignment < MIN_CHUNK_SIZE) /* must be at least a minimum chunk size */ alignment = MIN_CHUNK_SIZE; if ((alignment & (alignment-SIZE_T_ONE)) != 0) {/* Ensure a power of 2 */ size_t a = MALLOC_ALIGNMENT << 1; while (a < alignment) a <<= 1; alignment = a; } if (bytes >= MAX_REQUEST - alignment) { if (m != 0) { /* Test isn't needed but avoids compiler warning */ MALLOC_FAILURE_ACTION; } } else { size_t nb = request2size(bytes); size_t req = nb + alignment + MIN_CHUNK_SIZE - CHUNK_OVERHEAD; mem = internal_malloc(m, req); if (mem != 0) { mchunkptr p = mem2chunk(mem); if (PREACTION(m)) return 0; if ((((size_t)(mem)) & (alignment - 1)) != 0) { /* misaligned */ /* Find an aligned spot inside chunk. Since we need to give back leading space in a chunk of at least MIN_CHUNK_SIZE, if the first calculation places us at a spot with less than MIN_CHUNK_SIZE leader, we can move to the next aligned spot. We've allocated enough total room so that this is always possible. */ char* br = (char*)mem2chunk((size_t)(((size_t)((char*)mem + alignment - SIZE_T_ONE)) & -alignment)); char* pos = ((size_t)(br - (char*)(p)) >= MIN_CHUNK_SIZE)? br : br+alignment; mchunkptr newp = (mchunkptr)pos; size_t leadsize = pos - (char*)(p); size_t newsize = chunksize(p) - leadsize; if (is_mmapped(p)) { /* For mmapped chunks, just adjust offset */ newp->prev_foot = p->prev_foot + leadsize; newp->head = newsize; } else { /* Otherwise, give back leader, use the rest */ set_inuse(m, newp, newsize); set_inuse(m, p, leadsize); dispose_chunk(m, p, leadsize); } p = newp; } /* Give back spare room at the end */ if (!is_mmapped(p)) { size_t size = chunksize(p); if (size > nb + MIN_CHUNK_SIZE) { size_t remainder_size = size - nb; mchunkptr remainder = chunk_plus_offset(p, nb); set_inuse(m, p, nb); set_inuse(m, remainder, remainder_size); dispose_chunk(m, remainder, remainder_size); } } mem = chunk2mem(p); assert (chunksize(p) >= nb); assert(((size_t)mem & (alignment - 1)) == 0); check_inuse_chunk(m, p); POSTACTION(m); } } return mem; } /* Common support for independent_X routines, handling all of the combinations that can result. The opts arg has: bit 0 set if all elements are same size (using sizes[0]) bit 1 set if elements should be zeroed */ static void** ialloc(mstate m, size_t n_elements, size_t* sizes, int opts, void* chunks[]) { size_t element_size; /* chunksize of each element, if all same */ size_t contents_size; /* total size of elements */ size_t array_size; /* request size of pointer array */ void* mem; /* malloced aggregate space */ mchunkptr p; /* corresponding chunk */ size_t remainder_size; /* remaining bytes while splitting */ void** marray; /* either "chunks" or malloced ptr array */ mchunkptr array_chunk; /* chunk for malloced ptr array */ flag_t was_enabled; /* to disable mmap */ size_t size; size_t i; ensure_initialization(); /* compute array length, if needed */ if (chunks != 0) { if (n_elements == 0) return chunks; /* nothing to do */ marray = chunks; array_size = 0; } else { /* if empty req, must still return chunk representing empty array */ if (n_elements == 0) return (void**)internal_malloc(m, 0); marray = 0; array_size = request2size(n_elements * (sizeof(void*))); } /* compute total element size */ if (opts & 0x1) { /* all-same-size */ element_size = request2size(*sizes); contents_size = n_elements * element_size; } else { /* add up all the sizes */ element_size = 0; contents_size = 0; for (i = 0; i != n_elements; ++i) contents_size += request2size(sizes[i]); } size = contents_size + array_size; /* Allocate the aggregate chunk. First disable direct-mmapping so malloc won't use it, since we would not be able to later free/realloc space internal to a segregated mmap region. */ was_enabled = use_mmap(m); disable_mmap(m); mem = internal_malloc(m, size - CHUNK_OVERHEAD); if (was_enabled) enable_mmap(m); if (mem == 0) return 0; if (PREACTION(m)) return 0; p = mem2chunk(mem); remainder_size = chunksize(p); assert(!is_mmapped(p)); if (opts & 0x2) { /* optionally clear the elements */ memset((size_t*)mem, 0, remainder_size - SIZE_T_SIZE - array_size); } /* If not provided, allocate the pointer array as final part of chunk */ if (marray == 0) { size_t array_chunk_size; array_chunk = chunk_plus_offset(p, contents_size); array_chunk_size = remainder_size - contents_size; marray = (void**) (chunk2mem(array_chunk)); set_size_and_pinuse_of_inuse_chunk(m, array_chunk, array_chunk_size); remainder_size = contents_size; } /* split out elements */ for (i = 0; ; ++i) { marray[i] = chunk2mem(p); if (i != n_elements-1) { if (element_size != 0) size = element_size; else size = request2size(sizes[i]); remainder_size -= size; set_size_and_pinuse_of_inuse_chunk(m, p, size); p = chunk_plus_offset(p, size); } else { /* the final element absorbs any overallocation slop */ set_size_and_pinuse_of_inuse_chunk(m, p, remainder_size); break; } } #if DEBUG if (marray != chunks) { /* final element must have exactly exhausted chunk */ if (element_size != 0) { assert(remainder_size == element_size); } else { assert(remainder_size == request2size(sizes[i])); } check_inuse_chunk(m, mem2chunk(marray)); } for (i = 0; i != n_elements; ++i) check_inuse_chunk(m, mem2chunk(marray[i])); #endif /* DEBUG */ POSTACTION(m); return marray; } /* Try to free all pointers in the given array. Note: this could be made faster, by delaying consolidation, at the price of disabling some user integrity checks, We still optimize some consolidations by combining adjacent chunks before freeing, which will occur often if allocated with ialloc or the array is sorted. */ static size_t internal_bulk_free(mstate m, void* array[], size_t nelem) { size_t unfreed = 0; if (!PREACTION(m)) { void** a; void** fence = &(array[nelem]); for (a = array; a != fence; ++a) { void* mem = *a; if (mem != 0) { mchunkptr p = mem2chunk(mem); size_t psize = chunksize(p); #if FOOTERS if (get_mstate_for(p) != m) { ++unfreed; continue; } #endif check_inuse_chunk(m, p); *a = 0; if (RTCHECK(ok_address(m, p) && ok_inuse(p))) { void ** b = a + 1; /* try to merge with next chunk */ mchunkptr next = next_chunk(p); if (b != fence && *b == chunk2mem(next)) { size_t newsize = chunksize(next) + psize; set_inuse(m, p, newsize); *b = chunk2mem(p); } else dispose_chunk(m, p, psize); } else { CORRUPTION_ERROR_ACTION(m); break; } } } if (should_trim(m, m->topsize)) sys_trim(m, 0); POSTACTION(m); } return unfreed; } /* Traversal */ #if MALLOC_INSPECT_ALL static void internal_inspect_all(mstate m, void(*handler)(void *start, void *end, size_t used_bytes, void* callback_arg), void* arg) { if (is_initialized(m)) { mchunkptr top = m->top; msegmentptr s; for (s = &m->seg; s != 0; s = s->next) { mchunkptr q = align_as_chunk(s->base); while (segment_holds(s, q) && q->head != FENCEPOST_HEAD) { mchunkptr next = next_chunk(q); size_t sz = chunksize(q); size_t used; void* start; if (is_inuse(q)) { used = sz - CHUNK_OVERHEAD; /* must not be mmapped */ start = chunk2mem(q); } else { used = 0; if (is_small(sz)) { /* offset by possible bookkeeping */ start = (void*)((char*)q + sizeof(struct malloc_chunk)); } else { start = (void*)((char*)q + sizeof(struct malloc_tree_chunk)); } } if (start < (void*)next) /* skip if all space is bookkeeping */ handler(start, next, used, arg); if (q == top) break; q = next; } } } } #endif /* MALLOC_INSPECT_ALL */ /* ------------------ Exported realloc, memalign, etc -------------------- */ #if !ONLY_MSPACES void* dlrealloc(void* oldmem, size_t bytes) { void* mem = 0; if (oldmem == 0) { mem = dlmalloc(bytes); } else if (bytes >= MAX_REQUEST) { MALLOC_FAILURE_ACTION; } #ifdef REALLOC_ZERO_BYTES_FREES else if (bytes == 0) { dlfree(oldmem); } #endif /* REALLOC_ZERO_BYTES_FREES */ else { size_t nb = request2size(bytes); mchunkptr oldp = mem2chunk(oldmem); #if ! FOOTERS mstate m = gm; #else /* FOOTERS */ mstate m = get_mstate_for(oldp); if (!ok_magic(m)) { USAGE_ERROR_ACTION(m, oldmem); return 0; } #endif /* FOOTERS */ if (!PREACTION(m)) { mchunkptr newp = try_realloc_chunk(m, oldp, nb, 1); POSTACTION(m); if (newp != 0) { check_inuse_chunk(m, newp); mem = chunk2mem(newp); } else { mem = internal_malloc(m, bytes); if (mem != 0) { size_t oc = chunksize(oldp) - overhead_for(oldp); memcpy(mem, oldmem, (oc < bytes)? oc : bytes); internal_free(m, oldmem); } } } } return mem; } void* dlrealloc_in_place(void* oldmem, size_t bytes) { void* mem = 0; if (oldmem != 0) { if (bytes >= MAX_REQUEST) { MALLOC_FAILURE_ACTION; } else { size_t nb = request2size(bytes); mchunkptr oldp = mem2chunk(oldmem); #if ! FOOTERS mstate m = gm; #else /* FOOTERS */ mstate m = get_mstate_for(oldp); if (!ok_magic(m)) { USAGE_ERROR_ACTION(m, oldmem); return 0; } #endif /* FOOTERS */ if (!PREACTION(m)) { mchunkptr newp = try_realloc_chunk(m, oldp, nb, 0); POSTACTION(m); if (newp == oldp) { check_inuse_chunk(m, newp); mem = oldmem; } } } } return mem; } void* dlmemalign(size_t alignment, size_t bytes) { if (alignment <= MALLOC_ALIGNMENT) { return dlmalloc(bytes); } return internal_memalign(gm, alignment, bytes); } int dlposix_memalign(void** pp, size_t alignment, size_t bytes) { void* mem = 0; if (alignment == MALLOC_ALIGNMENT) mem = dlmalloc(bytes); else { size_t d = alignment / sizeof(void*); size_t r = alignment % sizeof(void*); if (r != 0 || d == 0 || (d & (d-SIZE_T_ONE)) != 0) return EINVAL; else if (bytes <= MAX_REQUEST - alignment) { if (alignment < MIN_CHUNK_SIZE) alignment = MIN_CHUNK_SIZE; mem = internal_memalign(gm, alignment, bytes); } } if (mem == 0) return ENOMEM; else { *pp = mem; return 0; } } void* dlvalloc(size_t bytes) { size_t pagesz; ensure_initialization(); pagesz = mparams.page_size; return dlmemalign(pagesz, bytes); } void* dlpvalloc(size_t bytes) { size_t pagesz; ensure_initialization(); pagesz = mparams.page_size; return dlmemalign(pagesz, (bytes + pagesz - SIZE_T_ONE) & ~(pagesz - SIZE_T_ONE)); } void** dlindependent_calloc(size_t n_elements, size_t elem_size, void* chunks[]) { size_t sz = elem_size; /* serves as 1-element array */ return ialloc(gm, n_elements, &sz, 3, chunks); } void** dlindependent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]) { return ialloc(gm, n_elements, sizes, 0, chunks); } size_t dlbulk_free(void* array[], size_t nelem) { return internal_bulk_free(gm, array, nelem); } #if MALLOC_INSPECT_ALL void dlmalloc_inspect_all(void(*handler)(void *start, void *end, size_t used_bytes, void* callback_arg), void* arg) { ensure_initialization(); if (!PREACTION(gm)) { internal_inspect_all(gm, handler, arg); POSTACTION(gm); } } #endif /* MALLOC_INSPECT_ALL */ int dlmalloc_trim(size_t pad) { int result = 0; ensure_initialization(); if (!PREACTION(gm)) { result = sys_trim(gm, pad); POSTACTION(gm); } return result; } size_t dlmalloc_footprint(void) { return gm->footprint; } size_t dlmalloc_max_footprint(void) { return gm->max_footprint; } size_t dlmalloc_footprint_limit(void) { size_t maf = gm->footprint_limit; return maf == 0 ? MAX_SIZE_T : maf; } size_t dlmalloc_set_footprint_limit(size_t bytes) { size_t result; /* invert sense of 0 */ if (bytes == 0) result = granularity_align(1); /* Use minimal size */ if (bytes == MAX_SIZE_T) result = 0; /* disable */ else result = granularity_align(bytes); return gm->footprint_limit = result; } #if !NO_MALLINFO struct mallinfo dlmallinfo(void) { return internal_mallinfo(gm); } #endif /* NO_MALLINFO */ #if !NO_MALLOC_STATS void dlmalloc_stats() { internal_malloc_stats(gm); } #endif /* NO_MALLOC_STATS */ int dlmallopt(int param_number, int value) { return change_mparam(param_number, value); } size_t dlmalloc_usable_size(void* mem) { if (mem != 0) { mchunkptr p = mem2chunk(mem); if (is_inuse(p)) return chunksize(p) - overhead_for(p); } return 0; } #endif /* !ONLY_MSPACES */ /* ----------------------------- user mspaces ---------------------------- */ #if MSPACES static mstate init_user_mstate(char* tbase, size_t tsize) { size_t msize = pad_request(sizeof(struct malloc_state)); mchunkptr mn; mchunkptr msp = align_as_chunk(tbase); mstate m = (mstate)(chunk2mem(msp)); memset(m, 0, msize); (void)INITIAL_LOCK(&m->mutex); msp->head = (msize|INUSE_BITS); m->seg.base = m->least_addr = tbase; m->seg.size = m->footprint = m->max_footprint = tsize; m->magic = mparams.magic; m->release_checks = MAX_RELEASE_CHECK_RATE; m->mflags = mparams.default_mflags; m->extp = 0; m->exts = 0; disable_contiguous(m); init_bins(m); mn = next_chunk(mem2chunk(m)); init_top(m, mn, (size_t)((tbase + tsize) - (char*)mn) - TOP_FOOT_SIZE); check_top_chunk(m, m->top); return m; } mspace create_mspace(size_t capacity, int locked) { mstate m = 0; size_t msize; ensure_initialization(); msize = pad_request(sizeof(struct malloc_state)); if (capacity < (size_t) -(msize + TOP_FOOT_SIZE + mparams.page_size)) { size_t rs = ((capacity == 0)? mparams.granularity : (capacity + TOP_FOOT_SIZE + msize)); size_t tsize = granularity_align(rs); char* tbase = (char*)(CALL_MMAP(tsize)); if (tbase != CMFAIL) { m = init_user_mstate(tbase, tsize); m->seg.sflags = USE_MMAP_BIT; set_lock(m, locked); } } return (mspace)m; } mspace create_mspace_with_base(void* base, size_t capacity, int locked) { mstate m = 0; size_t msize; ensure_initialization(); msize = pad_request(sizeof(struct malloc_state)); if (capacity > msize + TOP_FOOT_SIZE && capacity < (size_t) -(msize + TOP_FOOT_SIZE + mparams.page_size)) { m = init_user_mstate((char*)base, capacity); m->seg.sflags = EXTERN_BIT; set_lock(m, locked); } return (mspace)m; } int mspace_track_large_chunks(mspace msp, int enable) { int ret = 0; mstate ms = (mstate)msp; if (!PREACTION(ms)) { if (!use_mmap(ms)) { ret = 1; } if (!enable) { enable_mmap(ms); } else { disable_mmap(ms); } POSTACTION(ms); } return ret; } size_t destroy_mspace(mspace msp) { size_t freed = 0; mstate ms = (mstate)msp; if (ok_magic(ms)) { msegmentptr sp = &ms->seg; (void)DESTROY_LOCK(&ms->mutex); /* destroy before unmapped */ while (sp != 0) { char* base = sp->base; size_t size = sp->size; flag_t flag = sp->sflags; (void)base; /* placate people compiling -Wunused-variable */ sp = sp->next; if ((flag & USE_MMAP_BIT) && !(flag & EXTERN_BIT) && CALL_MUNMAP(base, size) == 0) freed += size; } } else { USAGE_ERROR_ACTION(ms,ms); } return freed; } /* mspace versions of routines are near-clones of the global versions. This is not so nice but better than the alternatives. */ void* mspace_malloc(mspace msp, size_t bytes) { mstate ms = (mstate)msp; if (!ok_magic(ms)) { USAGE_ERROR_ACTION(ms,ms); return 0; } if (!PREACTION(ms)) { void* mem; size_t nb; if (bytes <= MAX_SMALL_REQUEST) { bindex_t idx; binmap_t smallbits; nb = (bytes < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(bytes); idx = small_index(nb); smallbits = ms->smallmap >> idx; if ((smallbits & 0x3U) != 0) { /* Remainderless fit to a smallbin. */ mchunkptr b, p; idx += ~smallbits & 1; /* Uses next bin if idx empty */ b = smallbin_at(ms, idx); p = b->fd; assert(chunksize(p) == small_index2size(idx)); unlink_first_small_chunk(ms, b, p, idx); set_inuse_and_pinuse(ms, p, small_index2size(idx)); mem = chunk2mem(p); check_malloced_chunk(ms, mem, nb); goto postaction; } else if (nb > ms->dvsize) { if (smallbits != 0) { /* Use chunk in next nonempty smallbin */ mchunkptr b, p, r; size_t rsize; bindex_t i; binmap_t leftbits = (smallbits << idx) & left_bits(idx2bit(idx)); binmap_t leastbit = least_bit(leftbits); compute_bit2idx(leastbit, i); b = smallbin_at(ms, i); p = b->fd; assert(chunksize(p) == small_index2size(i)); unlink_first_small_chunk(ms, b, p, i); rsize = small_index2size(i) - nb; /* Fit here cannot be remainderless if 4byte sizes */ if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE) set_inuse_and_pinuse(ms, p, small_index2size(i)); else { set_size_and_pinuse_of_inuse_chunk(ms, p, nb); r = chunk_plus_offset(p, nb); set_size_and_pinuse_of_free_chunk(r, rsize); replace_dv(ms, r, rsize); } mem = chunk2mem(p); check_malloced_chunk(ms, mem, nb); goto postaction; } else if (ms->treemap != 0 && (mem = tmalloc_small(ms, nb)) != 0) { check_malloced_chunk(ms, mem, nb); goto postaction; } } } else if (bytes >= MAX_REQUEST) nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */ else { nb = pad_request(bytes); if (ms->treemap != 0 && (mem = tmalloc_large(ms, nb)) != 0) { check_malloced_chunk(ms, mem, nb); goto postaction; } } if (nb <= ms->dvsize) { size_t rsize = ms->dvsize - nb; mchunkptr p = ms->dv; if (rsize >= MIN_CHUNK_SIZE) { /* split dv */ mchunkptr r = ms->dv = chunk_plus_offset(p, nb); ms->dvsize = rsize; set_size_and_pinuse_of_free_chunk(r, rsize); set_size_and_pinuse_of_inuse_chunk(ms, p, nb); } else { /* exhaust dv */ size_t dvs = ms->dvsize; ms->dvsize = 0; ms->dv = 0; set_inuse_and_pinuse(ms, p, dvs); } mem = chunk2mem(p); check_malloced_chunk(ms, mem, nb); goto postaction; } else if (nb < ms->topsize) { /* Split top */ size_t rsize = ms->topsize -= nb; mchunkptr p = ms->top; mchunkptr r = ms->top = chunk_plus_offset(p, nb); r->head = rsize | PINUSE_BIT; set_size_and_pinuse_of_inuse_chunk(ms, p, nb); mem = chunk2mem(p); check_top_chunk(ms, ms->top); check_malloced_chunk(ms, mem, nb); goto postaction; } mem = sys_alloc(ms, nb); postaction: POSTACTION(ms); return mem; } return 0; } void mspace_free(mspace msp, void* mem) { if (mem != 0) { mchunkptr p = mem2chunk(mem); #if FOOTERS mstate fm = get_mstate_for(p); (void)msp; /* placate people compiling -Wunused */ #else /* FOOTERS */ mstate fm = (mstate)msp; #endif /* FOOTERS */ if (!ok_magic(fm)) { USAGE_ERROR_ACTION(fm, p); return; } if (!PREACTION(fm)) { check_inuse_chunk(fm, p); if (RTCHECK(ok_address(fm, p) && ok_inuse(p))) { size_t psize = chunksize(p); mchunkptr next = chunk_plus_offset(p, psize); if (!pinuse(p)) { size_t prevsize = p->prev_foot; if (is_mmapped(p)) { psize += prevsize + MMAP_FOOT_PAD; if (CALL_MUNMAP((char*)p - prevsize, psize) == 0) fm->footprint -= psize; goto postaction; } else { mchunkptr prev = chunk_minus_offset(p, prevsize); psize += prevsize; p = prev; if (RTCHECK(ok_address(fm, prev))) { /* consolidate backward */ if (p != fm->dv) { unlink_chunk(fm, p, prevsize); } else if ((next->head & INUSE_BITS) == INUSE_BITS) { fm->dvsize = psize; set_free_with_pinuse(p, psize, next); goto postaction; } } else goto erroraction; } } if (RTCHECK(ok_next(p, next) && ok_pinuse(next))) { if (!cinuse(next)) { /* consolidate forward */ if (next == fm->top) { size_t tsize = fm->topsize += psize; fm->top = p; p->head = tsize | PINUSE_BIT; if (p == fm->dv) { fm->dv = 0; fm->dvsize = 0; } if (should_trim(fm, tsize)) sys_trim(fm, 0); goto postaction; } else if (next == fm->dv) { size_t dsize = fm->dvsize += psize; fm->dv = p; set_size_and_pinuse_of_free_chunk(p, dsize); goto postaction; } else { size_t nsize = chunksize(next); psize += nsize; unlink_chunk(fm, next, nsize); set_size_and_pinuse_of_free_chunk(p, psize); if (p == fm->dv) { fm->dvsize = psize; goto postaction; } } } else set_free_with_pinuse(p, psize, next); if (is_small(psize)) { insert_small_chunk(fm, p, psize); check_free_chunk(fm, p); } else { tchunkptr tp = (tchunkptr)p; insert_large_chunk(fm, tp, psize); check_free_chunk(fm, p); if (--fm->release_checks == 0) release_unused_segments(fm); } goto postaction; } } erroraction: USAGE_ERROR_ACTION(fm, p); postaction: POSTACTION(fm); } } } void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size) { void* mem; size_t req = 0; mstate ms = (mstate)msp; if (!ok_magic(ms)) { USAGE_ERROR_ACTION(ms,ms); return 0; } if (n_elements != 0) { req = n_elements * elem_size; if (((n_elements | elem_size) & ~(size_t)0xffff) && (req / n_elements != elem_size)) req = MAX_SIZE_T; /* force downstream failure on overflow */ } mem = internal_malloc(ms, req); if (mem != 0 && calloc_must_clear(mem2chunk(mem))) memset(mem, 0, req); return mem; } void* mspace_realloc(mspace msp, void* oldmem, size_t bytes) { void* mem = 0; if (oldmem == 0) { mem = mspace_malloc(msp, bytes); } else if (bytes >= MAX_REQUEST) { MALLOC_FAILURE_ACTION; } #ifdef REALLOC_ZERO_BYTES_FREES else if (bytes == 0) { mspace_free(msp, oldmem); } #endif /* REALLOC_ZERO_BYTES_FREES */ else { size_t nb = request2size(bytes); mchunkptr oldp = mem2chunk(oldmem); #if ! FOOTERS mstate m = (mstate)msp; #else /* FOOTERS */ mstate m = get_mstate_for(oldp); if (!ok_magic(m)) { USAGE_ERROR_ACTION(m, oldmem); return 0; } #endif /* FOOTERS */ if (!PREACTION(m)) { mchunkptr newp = try_realloc_chunk(m, oldp, nb, 1); POSTACTION(m); if (newp != 0) { check_inuse_chunk(m, newp); mem = chunk2mem(newp); } else { mem = mspace_malloc(m, bytes); if (mem != 0) { size_t oc = chunksize(oldp) - overhead_for(oldp); memcpy(mem, oldmem, (oc < bytes)? oc : bytes); mspace_free(m, oldmem); } } } } return mem; } void* mspace_realloc_in_place(mspace msp, void* oldmem, size_t bytes) { void* mem = 0; if (oldmem != 0) { if (bytes >= MAX_REQUEST) { MALLOC_FAILURE_ACTION; } else { size_t nb = request2size(bytes); mchunkptr oldp = mem2chunk(oldmem); #if ! FOOTERS mstate m = (mstate)msp; #else /* FOOTERS */ mstate m = get_mstate_for(oldp); (void)msp; /* placate people compiling -Wunused */ if (!ok_magic(m)) { USAGE_ERROR_ACTION(m, oldmem); return 0; } #endif /* FOOTERS */ if (!PREACTION(m)) { mchunkptr newp = try_realloc_chunk(m, oldp, nb, 0); POSTACTION(m); if (newp == oldp) { check_inuse_chunk(m, newp); mem = oldmem; } } } } return mem; } void* mspace_memalign(mspace msp, size_t alignment, size_t bytes) { mstate ms = (mstate)msp; if (!ok_magic(ms)) { USAGE_ERROR_ACTION(ms,ms); return 0; } if (alignment <= MALLOC_ALIGNMENT) return mspace_malloc(msp, bytes); return internal_memalign(ms, alignment, bytes); } void** mspace_independent_calloc(mspace msp, size_t n_elements, size_t elem_size, void* chunks[]) { size_t sz = elem_size; /* serves as 1-element array */ mstate ms = (mstate)msp; if (!ok_magic(ms)) { USAGE_ERROR_ACTION(ms,ms); return 0; } return ialloc(ms, n_elements, &sz, 3, chunks); } void** mspace_independent_comalloc(mspace msp, size_t n_elements, size_t sizes[], void* chunks[]) { mstate ms = (mstate)msp; if (!ok_magic(ms)) { USAGE_ERROR_ACTION(ms,ms); return 0; } return ialloc(ms, n_elements, sizes, 0, chunks); } size_t mspace_bulk_free(mspace msp, void* array[], size_t nelem) { return internal_bulk_free((mstate)msp, array, nelem); } #if MALLOC_INSPECT_ALL void mspace_inspect_all(mspace msp, void(*handler)(void *start, void *end, size_t used_bytes, void* callback_arg), void* arg) { mstate ms = (mstate)msp; if (ok_magic(ms)) { if (!PREACTION(ms)) { internal_inspect_all(ms, handler, arg); POSTACTION(ms); } } else { USAGE_ERROR_ACTION(ms,ms); } } #endif /* MALLOC_INSPECT_ALL */ int mspace_trim(mspace msp, size_t pad) { int result = 0; mstate ms = (mstate)msp; if (ok_magic(ms)) { if (!PREACTION(ms)) { result = sys_trim(ms, pad); POSTACTION(ms); } } else { USAGE_ERROR_ACTION(ms,ms); } return result; } #if !NO_MALLOC_STATS void mspace_malloc_stats(mspace msp) { mstate ms = (mstate)msp; if (ok_magic(ms)) { internal_malloc_stats(ms); } else { USAGE_ERROR_ACTION(ms,ms); } } #endif /* NO_MALLOC_STATS */ size_t mspace_footprint(mspace msp) { size_t result = 0; mstate ms = (mstate)msp; if (ok_magic(ms)) { result = ms->footprint; } else { USAGE_ERROR_ACTION(ms,ms); } return result; } size_t mspace_max_footprint(mspace msp) { size_t result = 0; mstate ms = (mstate)msp; if (ok_magic(ms)) { result = ms->max_footprint; } else { USAGE_ERROR_ACTION(ms,ms); } return result; } size_t mspace_footprint_limit(mspace msp) { size_t result = 0; mstate ms = (mstate)msp; if (ok_magic(ms)) { size_t maf = ms->footprint_limit; result = (maf == 0) ? MAX_SIZE_T : maf; } else { USAGE_ERROR_ACTION(ms,ms); } return result; } size_t mspace_set_footprint_limit(mspace msp, size_t bytes) { size_t result = 0; mstate ms = (mstate)msp; if (ok_magic(ms)) { if (bytes == 0) result = granularity_align(1); /* Use minimal size */ if (bytes == MAX_SIZE_T) result = 0; /* disable */ else result = granularity_align(bytes); ms->footprint_limit = result; } else { USAGE_ERROR_ACTION(ms,ms); } return result; } #if !NO_MALLINFO struct mallinfo mspace_mallinfo(mspace msp) { mstate ms = (mstate)msp; if (!ok_magic(ms)) { USAGE_ERROR_ACTION(ms,ms); } return internal_mallinfo(ms); } #endif /* NO_MALLINFO */ size_t mspace_usable_size(const void* mem) { if (mem != 0) { mchunkptr p = mem2chunk(mem); if (is_inuse(p)) return chunksize(p) - overhead_for(p); } return 0; } int mspace_mallopt(int param_number, int value) { return change_mparam(param_number, value); } #endif /* MSPACES */ /* -------------------- Alternative MORECORE functions ------------------- */ /* Guidelines for creating a custom version of MORECORE: * For best performance, MORECORE should allocate in multiples of pagesize. * MORECORE may allocate more memory than requested. (Or even less, but this will usually result in a malloc failure.) * MORECORE must not allocate memory when given argument zero, but instead return one past the end address of memory from previous nonzero call. * For best performance, consecutive calls to MORECORE with positive arguments should return increasing addresses, indicating that space has been contiguously extended. * Even though consecutive calls to MORECORE need not return contiguous addresses, it must be OK for malloc'ed chunks to span multiple regions in those cases where they do happen to be contiguous. * MORECORE need not handle negative arguments -- it may instead just return MFAIL when given negative arguments. Negative arguments are always multiples of pagesize. MORECORE must not misinterpret negative args as large positive unsigned args. You can suppress all such calls from even occurring by defining MORECORE_CANNOT_TRIM, As an example alternative MORECORE, here is a custom allocator kindly contributed for pre-OSX macOS. It uses virtually but not necessarily physically contiguous non-paged memory (locked in, present and won't get swapped out). You can use it by uncommenting this section, adding some #includes, and setting up the appropriate defines above: #define MORECORE osMoreCore There is also a shutdown routine that should somehow be called for cleanup upon program exit. #define MAX_POOL_ENTRIES 100 #define MINIMUM_MORECORE_SIZE (64 * 1024U) static int next_os_pool; void *our_os_pools[MAX_POOL_ENTRIES]; void *osMoreCore(int size) { void *ptr = 0; static void *sbrk_top = 0; if (size > 0) { if (size < MINIMUM_MORECORE_SIZE) size = MINIMUM_MORECORE_SIZE; if (CurrentExecutionLevel() == kTaskLevel) ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0); if (ptr == 0) { return (void *) MFAIL; } // save ptrs so they can be freed during cleanup our_os_pools[next_os_pool] = ptr; next_os_pool++; ptr = (void *) ((((size_t) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK); sbrk_top = (char *) ptr + size; return ptr; } else if (size < 0) { // we don't currently support shrink behavior return (void *) MFAIL; } else { return sbrk_top; } } // cleanup any allocated memory pools // called as last thing before shutting down driver void osCleanupMem(void) { void **ptr; for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++) if (*ptr) { PoolDeallocate(*ptr); *ptr = 0; } } */ /* ----------------------------------------------------------------------- History: v2.8.6 Wed Aug 29 06:57:58 2012 Doug Lea * fix bad comparison in dlposix_memalign * don't reuse adjusted asize in sys_alloc * add LOCK_AT_FORK -- thanks to Kirill Artamonov for the suggestion * reduce compiler warnings -- thanks to all who reported/suggested these v2.8.5 Sun May 22 10:26:02 2011 Doug Lea (dl at gee) * Always perform unlink checks unless INSECURE * Add posix_memalign. * Improve realloc to expand in more cases; expose realloc_in_place. Thanks to Peter Buhr for the suggestion. * Add footprint_limit, inspect_all, bulk_free. Thanks to Barry Hayes and others for the suggestions. * Internal refactorings to avoid calls while holding locks * Use non-reentrant locks by default. Thanks to Roland McGrath for the suggestion. * Small fixes to mspace_destroy, reset_on_error. * Various configuration extensions/changes. Thanks to all who contributed these. V2.8.4a Thu Apr 28 14:39:43 2011 (dl at gee.cs.oswego.edu) * Update Creative Commons URL V2.8.4 Wed May 27 09:56:23 2009 Doug Lea (dl at gee) * Use zeros instead of prev foot for is_mmapped * Add mspace_track_large_chunks; thanks to Jean Brouwers * Fix set_inuse in internal_realloc; thanks to Jean Brouwers * Fix insufficient sys_alloc padding when using 16byte alignment * Fix bad error check in mspace_footprint * Adaptations for ptmalloc; thanks to Wolfram Gloger. * Reentrant spin locks; thanks to Earl Chew and others * Win32 improvements; thanks to Niall Douglas and Earl Chew * Add NO_SEGMENT_TRAVERSAL and MAX_RELEASE_CHECK_RATE options * Extension hook in malloc_state * Various small adjustments to reduce warnings on some compilers * Various configuration extensions/changes for more platforms. Thanks to all who contributed these. V2.8.3 Thu Sep 22 11:16:32 2005 Doug Lea (dl at gee) * Add max_footprint functions * Ensure all appropriate literals are size_t * Fix conditional compilation problem for some #define settings * Avoid concatenating segments with the one provided in create_mspace_with_base * Rename some variables to avoid compiler shadowing warnings * Use explicit lock initialization. * Better handling of sbrk interference. * Simplify and fix segment insertion, trimming and mspace_destroy * Reinstate REALLOC_ZERO_BYTES_FREES option from 2.7.x * Thanks especially to Dennis Flanagan for help on these. V2.8.2 Sun Jun 12 16:01:10 2005 Doug Lea (dl at gee) * Fix memalign brace error. V2.8.1 Wed Jun 8 16:11:46 2005 Doug Lea (dl at gee) * Fix improper #endif nesting in C++ * Add explicit casts needed for C++ V2.8.0 Mon May 30 14:09:02 2005 Doug Lea (dl at gee) * Use trees for large bins * Support mspaces * Use segments to unify sbrk-based and mmap-based system allocation, removing need for emulation on most platforms without sbrk. * Default safety checks * Optional footer checks. Thanks to William Robertson for the idea. * Internal code refactoring * Incorporate suggestions and platform-specific changes. Thanks to Dennis Flanagan, Colin Plumb, Niall Douglas, Aaron Bachmann, Emery Berger, and others. * Speed up non-fastbin processing enough to remove fastbins. * Remove useless cfree() to avoid conflicts with other apps. * Remove internal memcpy, memset. Compilers handle builtins better. * Remove some options that no one ever used and rename others. V2.7.2 Sat Aug 17 09:07:30 2002 Doug Lea (dl at gee) * Fix malloc_state bitmap array misdeclaration V2.7.1 Thu Jul 25 10:58:03 2002 Doug Lea (dl at gee) * Allow tuning of FIRST_SORTED_BIN_SIZE * Use PTR_UINT as type for all ptr->int casts. Thanks to John Belmonte. * Better detection and support for non-contiguousness of MORECORE. Thanks to Andreas Mueller, Conal Walsh, and Wolfram Gloger * Bypass most of malloc if no frees. Thanks To Emery Berger. * Fix freeing of old top non-contiguous chunk im sysmalloc. * Raised default trim and map thresholds to 256K. * Fix mmap-related #defines. Thanks to Lubos Lunak. * Fix copy macros; added LACKS_FCNTL_H. Thanks to Neal Walfield. * Branch-free bin calculation * Default trim and mmap thresholds now 256K. V2.7.0 Sun Mar 11 14:14:06 2001 Doug Lea (dl at gee) * Introduce independent_comalloc and independent_calloc. Thanks to Michael Pachos for motivation and help. * Make optional .h file available * Allow > 2GB requests on 32bit systems. * new WIN32 sbrk, mmap, munmap, lock code from . Thanks also to Andreas Mueller , and Anonymous. * Allow override of MALLOC_ALIGNMENT (Thanks to Ruud Waij for helping test this.) * memalign: check alignment arg * realloc: don't try to shift chunks backwards, since this leads to more fragmentation in some programs and doesn't seem to help in any others. * Collect all cases in malloc requiring system memory into sysmalloc * Use mmap as backup to sbrk * Place all internal state in malloc_state * Introduce fastbins (although similar to 2.5.1) * Many minor tunings and cosmetic improvements * Introduce USE_PUBLIC_MALLOC_WRAPPERS, USE_MALLOC_LOCK * Introduce MALLOC_FAILURE_ACTION, MORECORE_CONTIGUOUS Thanks to Tony E. Bennett and others. * Include errno.h to support default failure action. V2.6.6 Sun Dec 5 07:42:19 1999 Doug Lea (dl at gee) * return null for negative arguments * Added Several WIN32 cleanups from Martin C. Fong * Add 'LACKS_SYS_PARAM_H' for those systems without 'sys/param.h' (e.g. WIN32 platforms) * Cleanup header file inclusion for WIN32 platforms * Cleanup code to avoid Microsoft Visual C++ compiler complaints * Add 'USE_DL_PREFIX' to quickly allow co-existence with existing memory allocation routines * Set 'malloc_getpagesize' for WIN32 platforms (needs more work) * Use 'assert' rather than 'ASSERT' in WIN32 code to conform to usage of 'assert' in non-WIN32 code * Improve WIN32 'sbrk()' emulation's 'findRegion()' routine to avoid infinite loop * Always call 'fREe()' rather than 'free()' V2.6.5 Wed Jun 17 15:57:31 1998 Doug Lea (dl at gee) * Fixed ordering problem with boundary-stamping V2.6.3 Sun May 19 08:17:58 1996 Doug Lea (dl at gee) * Added pvalloc, as recommended by H.J. Liu * Added 64bit pointer support mainly from Wolfram Gloger * Added anonymously donated WIN32 sbrk emulation * Malloc, calloc, getpagesize: add optimizations from Raymond Nijssen * malloc_extend_top: fix mask error that caused wastage after foreign sbrks * Add linux mremap support code from HJ Liu V2.6.2 Tue Dec 5 06:52:55 1995 Doug Lea (dl at gee) * Integrated most documentation with the code. * Add support for mmap, with help from Wolfram Gloger (Gloger@lrz.uni-muenchen.de). * Use last_remainder in more cases. * Pack bins using idea from colin@nyx10.cs.du.edu * Use ordered bins instead of best-fit threshhold * Eliminate block-local decls to simplify tracing and debugging. * Support another case of realloc via move into top * Fix error occuring when initial sbrk_base not word-aligned. * Rely on page size for units instead of SBRK_UNIT to avoid surprises about sbrk alignment conventions. * Add mallinfo, mallopt. Thanks to Raymond Nijssen (raymond@es.ele.tue.nl) for the suggestion. * Add `pad' argument to malloc_trim and top_pad mallopt parameter. * More precautions for cases where other routines call sbrk, courtesy of Wolfram Gloger (Gloger@lrz.uni-muenchen.de). * Added macros etc., allowing use in linux libc from H.J. Lu (hjl@gnu.ai.mit.edu) * Inverted this history list V2.6.1 Sat Dec 2 14:10:57 1995 Doug Lea (dl at gee) * Re-tuned and fixed to behave more nicely with V2.6.0 changes. * Removed all preallocation code since under current scheme the work required to undo bad preallocations exceeds the work saved in good cases for most test programs. * No longer use return list or unconsolidated bins since no scheme using them consistently outperforms those that don't given above changes. * Use best fit for very large chunks to prevent some worst-cases. * Added some support for debugging V2.6.0 Sat Nov 4 07:05:23 1995 Doug Lea (dl at gee) * Removed footers when chunks are in use. Thanks to Paul Wilson (wilson@cs.texas.edu) for the suggestion. V2.5.4 Wed Nov 1 07:54:51 1995 Doug Lea (dl at gee) * Added malloc_trim, with help from Wolfram Gloger (wmglo@Dent.MED.Uni-Muenchen.DE). V2.5.3 Tue Apr 26 10:16:01 1994 Doug Lea (dl at g) V2.5.2 Tue Apr 5 16:20:40 1994 Doug Lea (dl at g) * realloc: try to expand in both directions * malloc: swap order of clean-bin strategy; * realloc: only conditionally expand backwards * Try not to scavenge used bins * Use bin counts as a guide to preallocation * Occasionally bin return list chunks in first scan * Add a few optimizations from colin@nyx10.cs.du.edu V2.5.1 Sat Aug 14 15:40:43 1993 Doug Lea (dl at g) * faster bin computation & slightly different binning * merged all consolidations to one part of malloc proper (eliminating old malloc_find_space & malloc_clean_bin) * Scan 2 returns chunks (not just 1) * Propagate failure in realloc if malloc returns 0 * Add stuff to allow compilation on non-ANSI compilers from kpv@research.att.com V2.5 Sat Aug 7 07:41:59 1993 Doug Lea (dl at g.oswego.edu) * removed potential for odd address access in prev_chunk * removed dependency on getpagesize.h * misc cosmetics and a bit more internal documentation * anticosmetics: mangled names in macros to evade debugger strangeness * tested on sparc, hp-700, dec-mips, rs6000 with gcc & native cc (hp, dec only) allowing Detlefs & Zorn comparison study (in SIGPLAN Notices.) Trial version Fri Aug 28 13:14:29 1992 Doug Lea (dl at g.oswego.edu) * Based loosely on libg++-1.2X malloc. (It retains some of the overall structure of old version, but most details differ.) */ fortran-caffeine-0.7.2/src/dlmalloc/dl_malloc.h0000664000175000017500000005743515162221361021607 0ustar alastairalastair/* Default header file for malloc-2.8.x, written by Doug Lea and released to the public domain, as explained at http://creativecommons.org/publicdomain/zero/1.0/ This header is for ANSI C/C++ only. You can set any of the following #defines before including: * If USE_DL_PREFIX is defined, it is assumed that malloc.c was also compiled with this option, so all routines have names starting with "dl". * If HAVE_USR_INCLUDE_MALLOC_H is defined, it is assumed that this file will be #included AFTER . This is needed only if your system defines a struct mallinfo that is incompatible with the standard one declared here. Otherwise, you can include this file INSTEAD of your system system . At least on ANSI, all declarations should be compatible with system versions * If MSPACES is defined, declarations for mspace versions are included. */ #ifndef MALLOC_280_H #define MALLOC_280_H #include "dl_malloc_caf.h" #ifdef __cplusplus extern "C" { #endif #include /* for size_t */ #ifndef ONLY_MSPACES #define ONLY_MSPACES 0 /* define to a value */ #elif ONLY_MSPACES != 0 #define ONLY_MSPACES 1 #endif /* ONLY_MSPACES */ #ifndef NO_MALLINFO #define NO_MALLINFO 0 #endif /* NO_MALLINFO */ #ifndef MSPACES #if ONLY_MSPACES #define MSPACES 1 #else /* ONLY_MSPACES */ #define MSPACES 0 #endif /* ONLY_MSPACES */ #endif /* MSPACES */ // YZ: moved mallinfo before "#if !ONLY_MSPACES" because mspace may use mallinfo too! #if !NO_MALLINFO #ifndef HAVE_USR_INCLUDE_MALLOC_H #ifndef _MALLOC_H #ifndef MALLINFO_FIELD_TYPE #define MALLINFO_FIELD_TYPE size_t #endif /* MALLINFO_FIELD_TYPE */ #ifndef STRUCT_MALLINFO_DECLARED #define STRUCT_MALLINFO_DECLARED 1 struct mallinfo { MALLINFO_FIELD_TYPE arena; /* non-mmapped space allocated from system */ MALLINFO_FIELD_TYPE ordblks; /* number of free chunks */ MALLINFO_FIELD_TYPE smblks; /* always 0 */ MALLINFO_FIELD_TYPE hblks; /* always 0 */ MALLINFO_FIELD_TYPE hblkhd; /* space in mmapped regions */ MALLINFO_FIELD_TYPE usmblks; /* maximum total allocated space */ MALLINFO_FIELD_TYPE fsmblks; /* always 0 */ MALLINFO_FIELD_TYPE uordblks; /* total allocated space */ MALLINFO_FIELD_TYPE fordblks; /* total free space */ MALLINFO_FIELD_TYPE keepcost; /* releasable (via malloc_trim) space */ }; #endif /* STRUCT_MALLINFO_DECLARED */ #endif /* _MALLOC_H */ #endif /* HAVE_USR_INCLUDE_MALLOC_H */ #endif /* !NO_MALLINFO */ #if !ONLY_MSPACES #ifndef USE_DL_PREFIX #define dlcalloc calloc #define dlfree free #define dlmalloc malloc #define dlmemalign memalign #define dlposix_memalign posix_memalign #define dlrealloc realloc #define dlvalloc valloc #define dlpvalloc pvalloc #define dlmallinfo mallinfo #define dlmallopt mallopt #define dlmalloc_trim malloc_trim #define dlmalloc_stats malloc_stats #define dlmalloc_usable_size malloc_usable_size #define dlmalloc_footprint malloc_footprint #define dlmalloc_max_footprint malloc_max_footprint #define dlmalloc_footprint_limit malloc_footprint_limit #define dlmalloc_set_footprint_limit malloc_set_footprint_limit #define dlmalloc_inspect_all malloc_inspect_all #define dlindependent_calloc independent_calloc #define dlindependent_comalloc independent_comalloc #define dlbulk_free bulk_free #endif /* USE_DL_PREFIX */ /* malloc(size_t n) Returns a pointer to a newly allocated chunk of at least n bytes, or null if no space is available, in which case errno is set to ENOMEM on ANSI C systems. If n is zero, malloc returns a minimum-sized chunk. (The minimum size is 16 bytes on most 32bit systems, and 32 bytes on 64bit systems.) Note that size_t is an unsigned type, so calls with arguments that would be negative if signed are interpreted as requests for huge amounts of space, which will often fail. The maximum supported value of n differs across systems, but is in all cases less than the maximum representable value of a size_t. */ void* dlmalloc(size_t); /* free(void* p) Releases the chunk of memory pointed to by p, that had been previously allocated using malloc or a related routine such as realloc. It has no effect if p is null. If p was not malloced or already freed, free(p) will by default cuase the current program to abort. */ void dlfree(void*); /* calloc(size_t n_elements, size_t element_size); Returns a pointer to n_elements * element_size bytes, with all locations set to zero. */ void* dlcalloc(size_t, size_t); /* realloc(void* p, size_t n) Returns a pointer to a chunk of size n that contains the same data as does chunk p up to the minimum of (n, p's size) bytes, or null if no space is available. The returned pointer may or may not be the same as p. The algorithm prefers extending p in most cases when possible, otherwise it employs the equivalent of a malloc-copy-free sequence. If p is null, realloc is equivalent to malloc. If space is not available, realloc returns null, errno is set (if on ANSI) and p is NOT freed. if n is for fewer bytes than already held by p, the newly unused space is lopped off and freed if possible. realloc with a size argument of zero (re)allocates a minimum-sized chunk. The old unix realloc convention of allowing the last-free'd chunk to be used as an argument to realloc is not supported. */ void* dlrealloc(void*, size_t); /* realloc_in_place(void* p, size_t n) Resizes the space allocated for p to size n, only if this can be done without moving p (i.e., only if there is adjacent space available if n is greater than p's current allocated size, or n is less than or equal to p's size). This may be used instead of plain realloc if an alternative allocation strategy is needed upon failure to expand space; for example, reallocation of a buffer that must be memory-aligned or cleared. You can use realloc_in_place to trigger these alternatives only when needed. Returns p if successful; otherwise null. */ void* dlrealloc_in_place(void*, size_t); /* memalign(size_t alignment, size_t n); Returns a pointer to a newly allocated chunk of n bytes, aligned in accord with the alignment argument. The alignment argument should be a power of two. If the argument is not a power of two, the nearest greater power is used. 8-byte alignment is guaranteed by normal malloc calls, so don't bother calling memalign with an argument of 8 or less. Overreliance on memalign is a sure way to fragment space. */ void* dlmemalign(size_t, size_t); /* int posix_memalign(void** pp, size_t alignment, size_t n); Allocates a chunk of n bytes, aligned in accord with the alignment argument. Differs from memalign only in that it (1) assigns the allocated memory to *pp rather than returning it, (2) fails and returns EINVAL if the alignment is not a power of two (3) fails and returns ENOMEM if memory cannot be allocated. */ int dlposix_memalign(void**, size_t, size_t); /* valloc(size_t n); Equivalent to memalign(pagesize, n), where pagesize is the page size of the system. If the pagesize is unknown, 4096 is used. */ void* dlvalloc(size_t); /* mallopt(int parameter_number, int parameter_value) Sets tunable parameters The format is to provide a (parameter-number, parameter-value) pair. mallopt then sets the corresponding parameter to the argument value if it can (i.e., so long as the value is meaningful), and returns 1 if successful else 0. SVID/XPG/ANSI defines four standard param numbers for mallopt, normally defined in malloc.h. None of these are use in this malloc, so setting them has no effect. But this malloc also supports other options in mallopt: Symbol param # default allowed param values M_TRIM_THRESHOLD -1 2*1024*1024 any (-1U disables trimming) M_GRANULARITY -2 page size any power of 2 >= page size M_MMAP_THRESHOLD -3 256*1024 any (or 0 if no MMAP support) */ int dlmallopt(int, int); #define M_TRIM_THRESHOLD (-1) #define M_GRANULARITY (-2) #define M_MMAP_THRESHOLD (-3) /* malloc_footprint(); Returns the number of bytes obtained from the system. The total number of bytes allocated by malloc, realloc etc., is less than this value. Unlike mallinfo, this function returns only a precomputed result, so can be called frequently to monitor memory consumption. Even if locks are otherwise defined, this function does not use them, so results might not be up to date. */ size_t dlmalloc_footprint(void); /* malloc_max_footprint(); Returns the maximum number of bytes obtained from the system. This value will be greater than current footprint if deallocated space has been reclaimed by the system. The peak number of bytes allocated by malloc, realloc etc., is less than this value. Unlike mallinfo, this function returns only a precomputed result, so can be called frequently to monitor memory consumption. Even if locks are otherwise defined, this function does not use them, so results might not be up to date. */ size_t dlmalloc_max_footprint(void); /* malloc_footprint_limit(); Returns the number of bytes that the heap is allowed to obtain from the system, returning the last value returned by malloc_set_footprint_limit, or the maximum size_t value if never set. The returned value reflects a permission. There is no guarantee that this number of bytes can actually be obtained from the system. */ size_t dlmalloc_footprint_limit(void); /* malloc_set_footprint_limit(); Sets the maximum number of bytes to obtain from the system, causing failure returns from malloc and related functions upon attempts to exceed this value. The argument value may be subject to page rounding to an enforceable limit; this actual value is returned. Using an argument of the maximum possible size_t effectively disables checks. If the argument is less than or equal to the current malloc_footprint, then all future allocations that require additional system memory will fail. However, invocation cannot retroactively deallocate existing used memory. */ size_t dlmalloc_set_footprint_limit(size_t bytes); /* malloc_inspect_all(void(*handler)(void *start, void *end, size_t used_bytes, void* callback_arg), void* arg); Traverses the heap and calls the given handler for each managed region, skipping all bytes that are (or may be) used for bookkeeping purposes. Traversal does not include include chunks that have been directly memory mapped. Each reported region begins at the start address, and continues up to but not including the end address. The first used_bytes of the region contain allocated data. If used_bytes is zero, the region is unallocated. The handler is invoked with the given callback argument. If locks are defined, they are held during the entire traversal. It is a bad idea to invoke other malloc functions from within the handler. For example, to count the number of in-use chunks with size greater than 1000, you could write: static int count = 0; void count_chunks(void* start, void* end, size_t used, void* arg) { if (used >= 1000) ++count; } then: malloc_inspect_all(count_chunks, NULL); malloc_inspect_all is compiled only if MALLOC_INSPECT_ALL is defined. */ void dlmalloc_inspect_all(void(*handler)(void*, void *, size_t, void*), void* arg); #if !NO_MALLINFO /* mallinfo() Returns (by copy) a struct containing various summary statistics: arena: current total non-mmapped bytes allocated from system ordblks: the number of free chunks smblks: always zero. hblks: current number of mmapped regions hblkhd: total bytes held in mmapped regions usmblks: the maximum total allocated space. This will be greater than current total if trimming has occurred. fsmblks: always zero uordblks: current total allocated space (normal or mmapped) fordblks: total free space keepcost: the maximum number of bytes that could ideally be released back to system via malloc_trim. ("ideally" means that it ignores page restrictions etc.) Because these fields are ints, but internal bookkeeping may be kept as longs, the reported values may wrap around zero and thus be inaccurate. */ struct mallinfo dlmallinfo(void); #endif /* NO_MALLINFO */ /* independent_calloc(size_t n_elements, size_t element_size, void* chunks[]); independent_calloc is similar to calloc, but instead of returning a single cleared space, it returns an array of pointers to n_elements independent elements that can hold contents of size elem_size, each of which starts out cleared, and can be independently freed, realloc'ed etc. The elements are guaranteed to be adjacently allocated (this is not guaranteed to occur with multiple callocs or mallocs), which may also improve cache locality in some applications. The "chunks" argument is optional (i.e., may be null, which is probably the most typical usage). If it is null, the returned array is itself dynamically allocated and should also be freed when it is no longer needed. Otherwise, the chunks array must be of at least n_elements in length. It is filled in with the pointers to the chunks. In either case, independent_calloc returns this pointer array, or null if the allocation failed. If n_elements is zero and "chunks" is null, it returns a chunk representing an array with zero elements (which should be freed if not wanted). Each element must be freed when it is no longer needed. This can be done all at once using bulk_free. independent_calloc simplifies and speeds up implementations of many kinds of pools. It may also be useful when constructing large data structures that initially have a fixed number of fixed-sized nodes, but the number is not known at compile time, and some of the nodes may later need to be freed. For example: struct Node { int item; struct Node* next; }; struct Node* build_list() { struct Node** pool; int n = read_number_of_nodes_needed(); if (n <= 0) return 0; pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0); if (pool == 0) die(); // organize into a linked list... struct Node* first = pool[0]; for (i = 0; i < n-1; ++i) pool[i]->next = pool[i+1]; free(pool); // Can now free the array (or not, if it is needed later) return first; } */ void** dlindependent_calloc(size_t, size_t, void**); /* independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]); independent_comalloc allocates, all at once, a set of n_elements chunks with sizes indicated in the "sizes" array. It returns an array of pointers to these elements, each of which can be independently freed, realloc'ed etc. The elements are guaranteed to be adjacently allocated (this is not guaranteed to occur with multiple callocs or mallocs), which may also improve cache locality in some applications. The "chunks" argument is optional (i.e., may be null). If it is null the returned array is itself dynamically allocated and should also be freed when it is no longer needed. Otherwise, the chunks array must be of at least n_elements in length. It is filled in with the pointers to the chunks. In either case, independent_comalloc returns this pointer array, or null if the allocation failed. If n_elements is zero and chunks is null, it returns a chunk representing an array with zero elements (which should be freed if not wanted). Each element must be freed when it is no longer needed. This can be done all at once using bulk_free. independent_comallac differs from independent_calloc in that each element may have a different size, and also that it does not automatically clear elements. independent_comalloc can be used to speed up allocation in cases where several structs or objects must always be allocated at the same time. For example: struct Head { ... } struct Foot { ... } void send_message(char* msg) { int msglen = strlen(msg); size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) }; void* chunks[3]; if (independent_comalloc(3, sizes, chunks) == 0) die(); struct Head* head = (struct Head*)(chunks[0]); char* body = (char*)(chunks[1]); struct Foot* foot = (struct Foot*)(chunks[2]); // ... } In general though, independent_comalloc is worth using only for larger values of n_elements. For small values, you probably won't detect enough difference from series of malloc calls to bother. Overuse of independent_comalloc can increase overall memory usage, since it cannot reuse existing noncontiguous small chunks that might be available for some of the elements. */ void** dlindependent_comalloc(size_t, size_t*, void**); /* bulk_free(void* array[], size_t n_elements) Frees and clears (sets to null) each non-null pointer in the given array. This is likely to be faster than freeing them one-by-one. If footers are used, pointers that have been allocated in different mspaces are not freed or cleared, and the count of all such pointers is returned. For large arrays of pointers with poor locality, it may be worthwhile to sort this array before calling bulk_free. */ size_t dlbulk_free(void**, size_t n_elements); /* pvalloc(size_t n); Equivalent to valloc(minimum-page-that-holds(n)), that is, round up n to nearest pagesize. */ void* dlpvalloc(size_t); /* malloc_trim(size_t pad); If possible, gives memory back to the system (via negative arguments to sbrk) if there is unused memory at the `high' end of the malloc pool or in unused MMAP segments. You can call this after freeing large blocks of memory to potentially reduce the system-level memory requirements of a program. However, it cannot guarantee to reduce memory. Under some allocation patterns, some large free blocks of memory will be locked between two used chunks, so they cannot be given back to the system. The `pad' argument to malloc_trim represents the amount of free trailing space to leave untrimmed. If this argument is zero, only the minimum amount of memory to maintain internal data structures will be left. Non-zero arguments can be supplied to maintain enough trailing space to service future expected allocations without having to re-obtain memory from the system. Malloc_trim returns 1 if it actually released any memory, else 0. */ int dlmalloc_trim(size_t); /* malloc_stats(); Prints on stderr the amount of space obtained from the system (both via sbrk and mmap), the maximum amount (which may be more than current if malloc_trim and/or munmap got called), and the current number of bytes allocated via malloc (or realloc, etc) but not yet freed. Note that this is the number of bytes allocated, not the number requested. It will be larger than the number requested because of alignment and bookkeeping overhead. Because it includes alignment wastage as being in use, this figure may be greater than zero even when no user-level chunks are allocated. The reported current and maximum system memory can be inaccurate if a program makes other calls to system memory allocation functions (normally sbrk) outside of malloc. malloc_stats prints only the most commonly interesting statistics. More information can be obtained by calling mallinfo. malloc_stats is not compiled if NO_MALLOC_STATS is defined. */ void dlmalloc_stats(void); #endif /* !ONLY_MSPACES */ /* malloc_usable_size(void* p); Returns the number of bytes you can actually use in an allocated chunk, which may be more than you requested (although often not) due to alignment and minimum size constraints. You can use this many bytes without worrying about overwriting other allocated objects. This is not a particularly great programming practice. malloc_usable_size can be more useful in debugging and assertions, for example: p = malloc(n); assert(malloc_usable_size(p) >= 256); */ size_t dlmalloc_usable_size(const void*); #if MSPACES /* mspace is an opaque type representing an independent region of space that supports mspace_malloc, etc. */ typedef void* mspace; /* create_mspace creates and returns a new independent space with the given initial capacity, or, if 0, the default granularity size. It returns null if there is no system memory available to create the space. If argument locked is non-zero, the space uses a separate lock to control access. The capacity of the space will grow dynamically as needed to service mspace_malloc requests. You can control the sizes of incremental increases of this space by compiling with a different DEFAULT_GRANULARITY or dynamically setting with mallopt(M_GRANULARITY, value). */ mspace create_mspace(size_t capacity, int locked); /* destroy_mspace destroys the given space, and attempts to return all of its memory back to the system, returning the total number of bytes freed. After destruction, the results of access to all memory used by the space become undefined. */ size_t destroy_mspace(mspace msp); /* create_mspace_with_base uses the memory supplied as the initial base of a new mspace. Part (less than 128*sizeof(size_t) bytes) of this space is used for bookkeeping, so the capacity must be at least this large. (Otherwise 0 is returned.) When this initial space is exhausted, additional memory will be obtained from the system. Destroying this space will deallocate all additionally allocated space (if possible) but not the initial base. */ mspace create_mspace_with_base(void* base, size_t capacity, int locked); /* mspace_track_large_chunks controls whether requests for large chunks are allocated in their own untracked mmapped regions, separate from others in this mspace. By default large chunks are not tracked, which reduces fragmentation. However, such chunks are not necessarily released to the system upon destroy_mspace. Enabling tracking by setting to true may increase fragmentation, but avoids leakage when relying on destroy_mspace to release all memory allocated using this space. The function returns the previous setting. */ int mspace_track_large_chunks(mspace msp, int enable); #if !NO_MALLINFO /* mspace_mallinfo behaves as mallinfo, but reports properties of the given space. */ struct mallinfo mspace_mallinfo(mspace msp); #endif /* NO_MALLINFO */ /* An alias for mallopt. */ int mspace_mallopt(int, int); /* The following operate identically to their malloc counterparts but operate only for the given mspace argument */ void* mspace_malloc(mspace msp, size_t bytes); void mspace_free(mspace msp, void* mem); void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size); void* mspace_realloc(mspace msp, void* mem, size_t newsize); void* mspace_realloc_in_place(mspace msp, void* mem, size_t newsize); void* mspace_memalign(mspace msp, size_t alignment, size_t bytes); void** mspace_independent_calloc(mspace msp, size_t n_elements, size_t elem_size, void* chunks[]); void** mspace_independent_comalloc(mspace msp, size_t n_elements, size_t sizes[], void* chunks[]); size_t mspace_bulk_free(mspace msp, void**, size_t n_elements); size_t mspace_usable_size(const void* mem); void mspace_malloc_stats(mspace msp); int mspace_trim(mspace msp, size_t pad); size_t mspace_footprint(mspace msp); size_t mspace_max_footprint(mspace msp); size_t mspace_footprint_limit(mspace msp); size_t mspace_set_footprint_limit(mspace msp, size_t bytes); void mspace_inspect_all(mspace msp, void(*handler)(void *, void *, size_t, void*), void* arg); #endif /* MSPACES */ #ifdef __cplusplus } /* end of extern "C" */ #endif #endif /* MALLOC_280_H */ fortran-caffeine-0.7.2/src/prif.F900000664000175000017500000015043215162221361017130 0ustar alastairalastair! Copyright (c), The Regents of the University of California ! Terms of use are as specified in LICENSE.txt #include "language-support.F90" module prif use iso_c_binding, only: & c_char, c_int, c_bool, c_intptr_t, c_ptr, & c_funptr, c_size_t, c_ptrdiff_t, c_null_ptr, c_int64_t #if HAVE_LOGICAL64 use iso_fortran_env, only: logical64 #endif #if CAF_IMPORT_ATOMIC_CONSTANTS use iso_fortran_env, only: ATOMIC_INT_KIND, ATOMIC_LOGICAL_KIND #endif #if CAF_IMPORT_STAT_CONSTANTS use iso_fortran_env, only: & STAT_FAILED_IMAGE, STAT_STOPPED_IMAGE, & STAT_LOCKED, STAT_LOCKED_OTHER_IMAGE, & STAT_UNLOCKED, STAT_UNLOCKED_FAILED_IMAGE #endif #if CAF_IMPORT_TEAM_CONSTANTS use iso_fortran_env, only: CURRENT_TEAM, INITIAL_TEAM, PARENT_TEAM #endif implicit none private public :: prif_init public :: prif_register_stop_callback, prif_stop_callback_interface public :: prif_stop, prif_error_stop, prif_fail_image public :: prif_allocate_coarray, prif_allocate, prif_deallocate #if CAF_PRIF_VERSION <= 6 public :: prif_deallocate_coarray #else public :: prif_deallocate_coarray, prif_deallocate_coarrays #endif public :: prif_put, prif_put_indirect, prif_get, prif_get_indirect, prif_put_with_notify, prif_put_with_notify_indirect public :: prif_put_indirect_with_notify, prif_put_indirect_with_notify_indirect public :: prif_get_strided, prif_get_strided_indirect, prif_put_strided, prif_put_strided_indirect public :: prif_put_strided_with_notify, prif_put_strided_with_notify_indirect public :: prif_put_strided_indirect_with_notify, prif_put_strided_indirect_with_notify_indirect public :: prif_alias_create, prif_alias_destroy public :: prif_lcobound_with_dim, prif_lcobound_no_dim, prif_ucobound_with_dim, prif_ucobound_no_dim, prif_coshape public :: prif_image_index, prif_image_index_with_team, prif_image_index_with_team_number public :: prif_initial_team_index, prif_initial_team_index_with_team, prif_initial_team_index_with_team_number public :: prif_this_image_no_coarray, prif_this_image_with_coarray, prif_this_image_with_dim public :: prif_num_images, prif_num_images_with_team, prif_num_images_with_team_number public :: prif_failed_images, prif_stopped_images, prif_image_status public :: prif_local_data_pointer, prif_set_context_data, prif_get_context_data, prif_size_bytes public :: prif_co_sum, prif_co_max, prif_co_min, prif_co_reduce, prif_co_broadcast public :: prif_co_min_character, prif_co_max_character public :: prif_operation_wrapper_interface public :: prif_form_team, prif_change_team, prif_end_team, prif_get_team, prif_team_number public :: prif_sync_all, prif_sync_images, prif_sync_team, prif_sync_memory public :: prif_lock, prif_lock_indirect, prif_unlock, prif_unlock_indirect public :: prif_critical, prif_end_critical public :: prif_event_post, prif_event_post_indirect, prif_event_wait, prif_event_query public :: prif_notify_wait public :: prif_atomic_add, prif_atomic_add_indirect, prif_atomic_and, prif_atomic_and_indirect public :: prif_atomic_or, prif_atomic_or_indirect, prif_atomic_xor, prif_atomic_xor_indirect public :: prif_atomic_cas_int, prif_atomic_cas_int_indirect, prif_atomic_cas_logical, prif_atomic_cas_logical_indirect public :: prif_atomic_fetch_add, prif_atomic_fetch_add_indirect public :: prif_atomic_fetch_and, prif_atomic_fetch_and_indirect, prif_atomic_fetch_or, prif_atomic_fetch_or_indirect public :: prif_atomic_fetch_xor, prif_atomic_fetch_xor_indirect public :: prif_atomic_define_int, prif_atomic_define_int_indirect, prif_atomic_define_logical, prif_atomic_define_logical_indirect public :: prif_atomic_ref_int, prif_atomic_ref_int_indirect, prif_atomic_ref_logical, prif_atomic_ref_logical_indirect integer(c_int), parameter, public :: PRIF_VERSION_MAJOR = CAF_PRIF_VERSION_MAJOR integer(c_int), parameter, public :: PRIF_VERSION_MINOR = CAF_PRIF_VERSION_MINOR #if CAF_IMPORT_ATOMIC_CONSTANTS integer, parameter, public :: PRIF_ATOMIC_INT_KIND = ATOMIC_INT_KIND integer, parameter, public :: PRIF_ATOMIC_LOGICAL_KIND = ATOMIC_LOGICAL_KIND #else integer, parameter, public :: PRIF_ATOMIC_INT_KIND = c_int64_t # if HAVE_SELECTED_LOGICAL_KIND integer, parameter, public :: PRIF_ATOMIC_LOGICAL_KIND = selected_logical_kind(64) # elif HAVE_LOGICAL64 integer, parameter, public :: PRIF_ATOMIC_LOGICAL_KIND = logical64 # else integer, parameter, public :: PRIF_ATOMIC_LOGICAL_KIND = PRIF_ATOMIC_INT_KIND # endif #endif #if CAF_IMPORT_TEAM_CONSTANTS integer(c_int), parameter, public :: & PRIF_CURRENT_TEAM = CURRENT_TEAM, & PRIF_INITIAL_TEAM = INITIAL_TEAM, & PRIF_PARENT_TEAM = PARENT_TEAM #else integer(c_int), parameter, public :: & PRIF_CURRENT_TEAM = 101, & PRIF_INITIAL_TEAM = 102, & PRIF_PARENT_TEAM = 103 #endif #if CAF_IMPORT_STAT_CONSTANTS integer(c_int), parameter, public :: & PRIF_STAT_FAILED_IMAGE = STAT_FAILED_IMAGE, & PRIF_STAT_LOCKED = STAT_LOCKED, & PRIF_STAT_LOCKED_OTHER_IMAGE = STAT_LOCKED_OTHER_IMAGE, & PRIF_STAT_STOPPED_IMAGE = STAT_STOPPED_IMAGE, & PRIF_STAT_UNLOCKED = STAT_UNLOCKED, & PRIF_STAT_UNLOCKED_FAILED_IMAGE = STAT_UNLOCKED_FAILED_IMAGE #else integer(c_int), parameter, public :: & PRIF_STAT_FAILED_IMAGE =-201, & PRIF_STAT_LOCKED = 202, & PRIF_STAT_LOCKED_OTHER_IMAGE = 203, & PRIF_STAT_STOPPED_IMAGE = 204, & PRIF_STAT_UNLOCKED = 205, & PRIF_STAT_UNLOCKED_FAILED_IMAGE = 206 #endif integer(c_int), parameter, public :: & PRIF_STAT_OUT_OF_MEMORY = 301, & PRIF_STAT_ALREADY_INIT = 302 integer(c_int), parameter, private :: & CAF_STAT_INVALID_ARGUMENT = 404 type, public :: prif_event_type private integer(PRIF_ATOMIC_INT_KIND) :: counter = 0 end type type, public :: prif_lock_type private ! TODO: actual implementation integer :: unimplemented_feature_placeholder = 0 end type type, public :: prif_critical_type private ! TODO: actual implementation integer :: unimplemented_feature_placeholder = 0 end type type, public :: prif_notify_type private integer(PRIF_ATOMIC_INT_KIND) :: counter = 0 end type type, public :: prif_coarray_handle private type(prif_coarray_descriptor), pointer :: info end type type, public :: prif_team_type private type(prif_team_descriptor), pointer :: info => null() end type abstract interface subroutine prif_stop_callback_interface( & is_error_stop, quiet, stop_code_int, stop_code_char) import :: c_bool, c_int implicit none logical(c_bool), intent(in) :: is_error_stop, quiet integer(c_int), intent(in), optional :: stop_code_int character(len=*), intent(in), optional :: stop_code_char end subroutine subroutine prif_operation_wrapper_interface(arg1, arg2_and_out, count, cdata) bind(C) import :: c_ptr, c_size_t implicit none type(c_ptr), intent(in), value :: arg1, arg2_and_out integer(c_size_t), intent(in), value :: count type(c_ptr), intent(in), value :: cdata end subroutine end interface interface module subroutine prif_init(stat) implicit none integer(c_int), intent(out) :: stat end subroutine module subroutine prif_register_stop_callback(callback) implicit none procedure(prif_stop_callback_interface), pointer, intent(in) :: callback end subroutine module subroutine prif_stop(quiet, stop_code_int, stop_code_char) implicit none logical(c_bool), intent(in) :: quiet integer(c_int), intent(in), optional :: stop_code_int character(len=*), intent(in), optional :: stop_code_char end subroutine module subroutine prif_error_stop(quiet, stop_code_int, stop_code_char) implicit none logical(c_bool), intent(in) :: quiet integer(c_int), intent(in), optional :: stop_code_int character(len=*), intent(in), optional :: stop_code_char end subroutine module subroutine prif_fail_image() implicit none end subroutine module subroutine prif_allocate_coarray( & lcobounds, ucobounds, size_in_bytes, final_func, coarray_handle, & allocated_memory, stat, errmsg, errmsg_alloc) implicit none integer(c_int64_t), dimension(:), intent(in) :: lcobounds, ucobounds integer(c_size_t), intent(in) :: size_in_bytes type(c_funptr), intent(in) :: final_func type(prif_coarray_handle), intent(out) :: coarray_handle type(c_ptr), intent(out) :: allocated_memory integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc end subroutine module subroutine prif_allocate(size_in_bytes, allocated_memory, stat, errmsg, errmsg_alloc) implicit none integer(c_size_t) :: size_in_bytes type(c_ptr), intent(out) :: allocated_memory integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc end subroutine #if CAF_PRIF_VERSION <= 6 module subroutine prif_deallocate_coarray(coarray_handles, stat, errmsg, errmsg_alloc) implicit none type(prif_coarray_handle), intent(in) :: coarray_handles(:) integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc end subroutine #else module subroutine prif_deallocate_coarray(coarray_handle, stat, errmsg, errmsg_alloc) implicit none type(prif_coarray_handle), intent(in) :: coarray_handle integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc end subroutine module subroutine prif_deallocate_coarrays(coarray_handles, stat, errmsg, errmsg_alloc) implicit none type(prif_coarray_handle), intent(in) :: coarray_handles(:) integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc end subroutine #endif module subroutine prif_deallocate(mem, stat, errmsg, errmsg_alloc) implicit none type(c_ptr), intent(in) :: mem integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc end subroutine module subroutine prif_put( & image_num, coarray_handle, offset, current_image_buffer, size_in_bytes, stat, errmsg, errmsg_alloc) implicit none integer(c_int), intent(in) :: image_num type(prif_coarray_handle), intent(in) :: coarray_handle integer(c_size_t), intent(in) :: offset type(c_ptr), intent(in) :: current_image_buffer integer(c_size_t), intent(in) :: size_in_bytes integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc end subroutine module subroutine prif_put_indirect( & image_num, remote_ptr, current_image_buffer, size_in_bytes, stat, errmsg, errmsg_alloc) implicit none integer(c_int), intent(in) :: image_num integer(c_intptr_t), intent(in) :: remote_ptr type(c_ptr), intent(in) :: current_image_buffer integer(c_size_t), intent(in) :: size_in_bytes integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc end subroutine module subroutine prif_put_with_notify( & image_num, coarray_handle, offset, current_image_buffer, size_in_bytes, & notify_coarray_handle, notify_offset, stat, errmsg, errmsg_alloc) implicit none integer(c_int), intent(in) :: image_num type(prif_coarray_handle), intent(in) :: coarray_handle integer(c_size_t), intent(in) :: offset type(c_ptr), intent(in) :: current_image_buffer integer(c_size_t), intent(in) :: size_in_bytes type(prif_coarray_handle), intent(in) :: notify_coarray_handle integer(c_size_t), intent(in) :: notify_offset integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc end subroutine module subroutine prif_put_with_notify_indirect( & image_num, coarray_handle, offset, current_image_buffer, size_in_bytes, notify_ptr, stat, errmsg, errmsg_alloc) implicit none integer(c_int), intent(in) :: image_num type(prif_coarray_handle), intent(in) :: coarray_handle integer(c_size_t), intent(in) :: offset type(c_ptr), intent(in) :: current_image_buffer integer(c_size_t), intent(in) :: size_in_bytes integer(c_intptr_t), intent(in) :: notify_ptr integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc end subroutine module subroutine prif_put_indirect_with_notify( & image_num, remote_ptr, current_image_buffer, size_in_bytes, notify_coarray_handle, notify_offset, & stat, errmsg, errmsg_alloc) implicit none integer(c_int), intent(in) :: image_num integer(c_intptr_t), intent(in) :: remote_ptr type(c_ptr), intent(in) :: current_image_buffer integer(c_size_t), intent(in) :: size_in_bytes type(prif_coarray_handle), intent(in) :: notify_coarray_handle integer(c_size_t), intent(in) :: notify_offset integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc end subroutine module subroutine prif_put_indirect_with_notify_indirect( & image_num, remote_ptr, current_image_buffer, size_in_bytes, notify_ptr, stat, errmsg, errmsg_alloc) implicit none integer(c_int), intent(in) :: image_num integer(c_intptr_t), intent(in) :: remote_ptr type(c_ptr), intent(in) :: current_image_buffer integer(c_size_t), intent(in) :: size_in_bytes integer(c_intptr_t), intent(in) :: notify_ptr integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc end subroutine module subroutine prif_get( & image_num, coarray_handle, offset, current_image_buffer, size_in_bytes, stat, errmsg, errmsg_alloc) implicit none integer(c_int), intent(in) :: image_num type(prif_coarray_handle), intent(in) :: coarray_handle integer(c_size_t), intent(in) :: offset type(c_ptr), intent(in) :: current_image_buffer integer(c_size_t), intent(in) :: size_in_bytes integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc end subroutine module subroutine prif_get_indirect(image_num, remote_ptr, current_image_buffer, size_in_bytes, stat, errmsg, errmsg_alloc) implicit none integer(c_int), intent(in) :: image_num integer(c_intptr_t), intent(in) :: remote_ptr type(c_ptr), intent(in) :: current_image_buffer integer(c_size_t), intent(in) :: size_in_bytes integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc end subroutine module subroutine prif_get_strided( & image_num, coarray_handle, offset, remote_stride, current_image_buffer, current_image_stride, & element_size, extent, stat, errmsg, errmsg_alloc) implicit none integer(c_int), intent(in) :: image_num type(prif_coarray_handle), intent(in) :: coarray_handle integer(c_size_t), intent(in) :: offset integer(c_ptrdiff_t), intent(in) :: remote_stride(:) type(c_ptr), intent(in) :: current_image_buffer integer(c_ptrdiff_t), intent(in) :: current_image_stride(:) integer(c_size_t), intent(in) :: element_size integer(c_size_t), intent(in) :: extent(:) integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc end subroutine module subroutine prif_get_strided_indirect( & image_num, remote_ptr, remote_stride, current_image_buffer, current_image_stride, element_size, extent, & stat, errmsg, errmsg_alloc) implicit none integer(c_int), intent(in) :: image_num integer(c_intptr_t), intent(in) :: remote_ptr integer(c_ptrdiff_t), intent(in) :: remote_stride(:) type(c_ptr), intent(in) :: current_image_buffer integer(c_ptrdiff_t), intent(in) :: current_image_stride(:) integer(c_size_t), intent(in) :: element_size integer(c_size_t), intent(in) :: extent(:) integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc end subroutine module subroutine prif_put_strided( & image_num, coarray_handle, offset, remote_stride, current_image_buffer, current_image_stride, element_size, & extent, stat, errmsg, errmsg_alloc) implicit none integer(c_int), intent(in) :: image_num type(prif_coarray_handle), intent(in) :: coarray_handle integer(c_size_t), intent(in) :: offset integer(c_ptrdiff_t), intent(in) :: remote_stride(:) type(c_ptr), intent(in) :: current_image_buffer integer(c_ptrdiff_t), intent(in) :: current_image_stride(:) integer(c_size_t), intent(in) :: element_size integer(c_size_t), intent(in) :: extent(:) integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc end subroutine module subroutine prif_put_strided_indirect( & image_num, remote_ptr, remote_stride, current_image_buffer, current_image_stride, element_size, extent, & stat, errmsg, errmsg_alloc) implicit none integer(c_int), intent(in) :: image_num integer(c_intptr_t), intent(in) :: remote_ptr integer(c_ptrdiff_t), intent(in) :: remote_stride(:) type(c_ptr), intent(in) :: current_image_buffer integer(c_ptrdiff_t), intent(in) :: current_image_stride(:) integer(c_size_t), intent(in) :: element_size integer(c_size_t), intent(in) :: extent(:) integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc end subroutine module subroutine prif_put_strided_with_notify( & image_num, coarray_handle, offset, remote_stride, current_image_buffer, current_image_stride, element_size, & extent, notify_coarray_handle, notify_offset, stat, errmsg, errmsg_alloc) implicit none integer(c_int), intent(in) :: image_num type(prif_coarray_handle), intent(in) :: coarray_handle integer(c_size_t), intent(in) :: offset integer(c_ptrdiff_t), intent(in) :: remote_stride(:) type(c_ptr), intent(in) :: current_image_buffer integer(c_ptrdiff_t), intent(in) :: current_image_stride(:) integer(c_size_t), intent(in) :: element_size integer(c_size_t), intent(in) :: extent(:) type(prif_coarray_handle), intent(in) :: notify_coarray_handle integer(c_size_t), intent(in) :: notify_offset integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc end subroutine module subroutine prif_put_strided_with_notify_indirect( & image_num, coarray_handle, offset, remote_stride, current_image_buffer, current_image_stride, element_size, & extent, notify_ptr, stat, errmsg, errmsg_alloc) implicit none integer(c_int), intent(in) :: image_num type(prif_coarray_handle), intent(in) :: coarray_handle integer(c_size_t), intent(in) :: offset integer(c_ptrdiff_t), intent(in) :: remote_stride(:) type(c_ptr), intent(in) :: current_image_buffer integer(c_ptrdiff_t), intent(in) :: current_image_stride(:) integer(c_size_t), intent(in) :: element_size integer(c_size_t), intent(in) :: extent(:) integer(c_intptr_t), intent(in) :: notify_ptr integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc end subroutine module subroutine prif_put_strided_indirect_with_notify( & image_num, remote_ptr, remote_stride, current_image_buffer, current_image_stride, element_size, extent, & notify_coarray_handle, notify_offset, stat, errmsg, errmsg_alloc) implicit none integer(c_int), intent(in) :: image_num integer(c_intptr_t), intent(in) :: remote_ptr integer(c_ptrdiff_t), intent(in) :: remote_stride(:) type(c_ptr), intent(in) :: current_image_buffer integer(c_ptrdiff_t), intent(in) :: current_image_stride(:) integer(c_size_t), intent(in) :: element_size integer(c_size_t), intent(in) :: extent(:) type(prif_coarray_handle), intent(in) :: notify_coarray_handle integer(c_size_t), intent(in) :: notify_offset integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc end subroutine module subroutine prif_put_strided_indirect_with_notify_indirect( & image_num, remote_ptr, remote_stride, current_image_buffer, current_image_stride, element_size, extent, & notify_ptr, stat, errmsg, errmsg_alloc) implicit none integer(c_int), intent(in) :: image_num integer(c_intptr_t), intent(in) :: remote_ptr integer(c_ptrdiff_t), intent(in) :: remote_stride(:) type(c_ptr), intent(in) :: current_image_buffer integer(c_ptrdiff_t), intent(in) :: current_image_stride(:) integer(c_size_t), intent(in) :: element_size integer(c_size_t), intent(in) :: extent(:) integer(c_intptr_t), intent(in) :: notify_ptr integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc end subroutine module subroutine prif_alias_create(source_handle, alias_lcobounds, alias_ucobounds, & # if CAF_PRIF_VERSION >= 6 data_pointer_offset, & # endif alias_handle) implicit none type(prif_coarray_handle), intent(in) :: source_handle integer(c_int64_t), intent(in) :: alias_lcobounds(:) integer(c_int64_t), intent(in) :: alias_ucobounds(:) # if CAF_PRIF_VERSION >= 6 integer(c_size_t), intent(in) :: data_pointer_offset # endif type(prif_coarray_handle), intent(out) :: alias_handle end subroutine module subroutine prif_alias_destroy(alias_handle) implicit none type(prif_coarray_handle), intent(in) :: alias_handle end subroutine module subroutine prif_lcobound_with_dim(coarray_handle, dim, lcobound) implicit none type(prif_coarray_handle), intent(in) :: coarray_handle integer(c_int), intent(in) :: dim integer(c_int64_t), intent(out) :: lcobound end subroutine module subroutine prif_lcobound_no_dim(coarray_handle, lcobounds) implicit none type(prif_coarray_handle), intent(in) :: coarray_handle integer(c_int64_t), intent(out) :: lcobounds(:) end subroutine module subroutine prif_ucobound_with_dim(coarray_handle, dim, ucobound) implicit none type(prif_coarray_handle), intent(in) :: coarray_handle integer(c_int), intent(in) :: dim integer(c_int64_t), intent(out) :: ucobound end subroutine module subroutine prif_ucobound_no_dim(coarray_handle, ucobounds) implicit none type(prif_coarray_handle), intent(in) :: coarray_handle integer(c_int64_t), intent(out) :: ucobounds(:) end subroutine module subroutine prif_coshape(coarray_handle, sizes) implicit none type(prif_coarray_handle), intent(in) :: coarray_handle integer(c_size_t), intent(out) :: sizes(:) end subroutine module subroutine prif_image_index(coarray_handle, sub, image_index) implicit none type(prif_coarray_handle), intent(in) :: coarray_handle integer(c_int64_t), intent(in) :: sub(:) integer(c_int), intent(out) :: image_index end subroutine module subroutine prif_image_index_with_team(coarray_handle, sub, team, image_index) implicit none type(prif_coarray_handle), intent(in) :: coarray_handle integer(c_int64_t), intent(in) :: sub(:) type(prif_team_type), intent(in) :: team integer(c_int), intent(out) :: image_index end subroutine module subroutine prif_image_index_with_team_number(coarray_handle, sub, team_number, image_index) implicit none type(prif_coarray_handle), intent(in) :: coarray_handle integer(c_int64_t), intent(in) :: sub(:) integer(c_int64_t), intent(in) :: team_number integer(c_int), intent(out) :: image_index end subroutine module subroutine prif_initial_team_index(coarray_handle, sub, initial_team_index, stat) implicit none type(prif_coarray_handle), intent(in) :: coarray_handle integer(c_int64_t), intent(in) :: sub(:) integer(c_int), intent(out) :: initial_team_index integer(c_int), intent(out), optional :: stat end subroutine module subroutine prif_initial_team_index_with_team(coarray_handle, sub, team, initial_team_index, stat) implicit none type(prif_coarray_handle), intent(in) :: coarray_handle integer(c_int64_t), intent(in) :: sub(:) type(prif_team_type), intent(in) :: team integer(c_int), intent(out) :: initial_team_index integer(c_int), intent(out), optional :: stat end subroutine module subroutine prif_initial_team_index_with_team_number(coarray_handle, sub, team_number, initial_team_index, stat) implicit none type(prif_coarray_handle), intent(in) :: coarray_handle integer(c_int64_t), intent(in) :: sub(:) integer(c_int64_t), intent(in) :: team_number integer(c_int), intent(out) :: initial_team_index integer(c_int), intent(out), optional :: stat end subroutine module subroutine prif_num_images(num_images) implicit none integer(c_int), intent(out) :: num_images end subroutine module subroutine prif_num_images_with_team(team, num_images) implicit none type(prif_team_type), intent(in) :: team integer(c_int), intent(out) :: num_images end subroutine module subroutine prif_num_images_with_team_number(team_number, num_images) implicit none integer(c_int64_t), intent(in) :: team_number integer(c_int), intent(out) :: num_images end subroutine module subroutine prif_this_image_no_coarray(team, this_image) implicit none type(prif_team_type), intent(in), optional :: team integer(c_int), intent(out) :: this_image end subroutine module subroutine prif_this_image_with_coarray(coarray_handle, team, cosubscripts) implicit none type(prif_coarray_handle), intent(in) :: coarray_handle type(prif_team_type), intent(in), optional :: team integer(c_int64_t), intent(out) :: cosubscripts(:) end subroutine module subroutine prif_this_image_with_dim(coarray_handle, dim, team, cosubscript) implicit none type(prif_coarray_handle), intent(in) :: coarray_handle integer(c_int), intent(in) :: dim type(prif_team_type), intent(in), optional :: team integer(c_int64_t), intent(out) :: cosubscript end subroutine module subroutine prif_failed_images(team, failed_images) implicit none type(prif_team_type), intent(in), optional :: team integer(c_int), allocatable, intent(out) :: failed_images(:) end subroutine module subroutine prif_stopped_images(team, stopped_images) implicit none type(prif_team_type), intent(in), optional :: team integer(c_int), allocatable, intent(out) :: stopped_images(:) end subroutine module impure elemental subroutine prif_image_status(image, team, image_status) implicit none integer(c_int), intent(in) :: image type(prif_team_type), intent(in), optional :: team integer(c_int), intent(out) :: image_status end subroutine module subroutine prif_local_data_pointer(coarray_handle, local_data) type(prif_coarray_handle), intent(in) :: coarray_handle type(c_ptr), intent(out) :: local_data end subroutine module subroutine prif_set_context_data(coarray_handle, context_data) implicit none type(prif_coarray_handle), intent(in) :: coarray_handle type(c_ptr), intent(in) :: context_data end subroutine module subroutine prif_get_context_data(coarray_handle, context_data) implicit none type(prif_coarray_handle), intent(in) :: coarray_handle type(c_ptr), intent(out) :: context_data end subroutine module subroutine prif_size_bytes(coarray_handle, data_size) implicit none type(prif_coarray_handle), intent(in) :: coarray_handle integer(c_size_t), intent(out) :: data_size end subroutine module subroutine prif_co_sum(a, result_image, stat, errmsg, errmsg_alloc) implicit none type(*), intent(inout), target :: a(..) integer(c_int), intent(in), optional :: result_image integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc end subroutine module subroutine prif_co_max(a, result_image, stat, errmsg, errmsg_alloc) implicit none type(*), intent(inout), target :: a(..) integer(c_int), intent(in), optional :: result_image integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc end subroutine module subroutine prif_co_max_character(a, result_image, stat, errmsg, errmsg_alloc) implicit none character(len=*, kind=c_char), intent(inout), target :: a(..) integer(c_int), intent(in), optional :: result_image integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc end subroutine module subroutine prif_co_min(a, result_image, stat, errmsg, errmsg_alloc) implicit none type(*), intent(inout), target :: a(..) integer(c_int), intent(in), optional :: result_image integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc end subroutine module subroutine prif_co_min_character(a, result_image, stat, errmsg, errmsg_alloc) implicit none character(len=*, kind=c_char), intent(inout), target :: a(..) integer(c_int), intent(in), optional :: result_image integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc end subroutine module subroutine prif_co_reduce(a, operation_wrapper, cdata, result_image, stat, errmsg, errmsg_alloc) implicit none type(*), intent(inout), target :: a(..) procedure(prif_operation_wrapper_interface), pointer, intent(in) :: operation_wrapper type(c_ptr), intent(in), value :: cdata integer(c_int), intent(in), optional :: result_image integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc end subroutine module subroutine prif_co_broadcast(a, source_image, stat, errmsg, errmsg_alloc) implicit none type(*), intent(inout), target :: a(..) integer(c_int), intent(in) :: source_image integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc end subroutine module subroutine prif_form_team(team_number, team, new_index, stat, errmsg, errmsg_alloc) implicit none integer(c_int64_t), intent(in) :: team_number type(prif_team_type), intent(out) :: team integer(c_int), intent(in), optional :: new_index integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc end subroutine module subroutine prif_change_team(team, stat, errmsg, errmsg_alloc) implicit none type(prif_team_type), intent(in) :: team integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc end subroutine module subroutine prif_end_team(stat, errmsg, errmsg_alloc) implicit none integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc end subroutine module subroutine prif_get_team(level, team) implicit none integer(c_int), intent(in), optional :: level type(prif_team_type), intent(out) :: team end subroutine module subroutine prif_team_number(team, team_number) implicit none type(prif_team_type), intent(in), optional :: team integer(c_int64_t), intent(out) :: team_number end subroutine module subroutine prif_sync_all(stat, errmsg, errmsg_alloc) implicit none integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc end subroutine module subroutine prif_sync_images(image_set, stat, errmsg, errmsg_alloc) implicit none integer(c_int), intent(in), optional :: image_set(:) integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc end subroutine module subroutine prif_sync_team(team, stat, errmsg, errmsg_alloc) implicit none type(prif_team_type), intent(in) :: team integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc end subroutine module subroutine prif_sync_memory(stat, errmsg, errmsg_alloc) implicit none integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc end subroutine module subroutine prif_lock(image_num, coarray_handle, offset, acquired_lock, stat, errmsg, errmsg_alloc) implicit none integer(c_int), intent(in) :: image_num type(prif_coarray_handle), intent(in) :: coarray_handle integer(c_size_t), intent(in) :: offset logical(c_bool), intent(out), optional :: acquired_lock integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc end subroutine module subroutine prif_lock_indirect(image_num, lock_var_ptr, acquired_lock, stat, errmsg, errmsg_alloc) implicit none integer(c_int), intent(in) :: image_num integer(c_intptr_t), intent(in) :: lock_var_ptr logical(c_bool), intent(out), optional :: acquired_lock integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc end subroutine module subroutine prif_unlock(image_num, coarray_handle, offset, stat, errmsg, errmsg_alloc) implicit none integer(c_int), intent(in) :: image_num type(prif_coarray_handle), intent(in) :: coarray_handle integer(c_size_t), intent(in) :: offset integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc end subroutine module subroutine prif_unlock_indirect(image_num, lock_var_ptr, stat, errmsg, errmsg_alloc) implicit none integer(c_int), intent(in) :: image_num integer(c_intptr_t), intent(in) :: lock_var_ptr integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc end subroutine module subroutine prif_critical(critical_coarray, stat, errmsg, errmsg_alloc) implicit none type(prif_coarray_handle), intent(in) :: critical_coarray integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc end subroutine module subroutine prif_end_critical(critical_coarray) implicit none type(prif_coarray_handle), intent(in) :: critical_coarray end subroutine module subroutine prif_event_post(image_num, coarray_handle, offset, stat, errmsg, errmsg_alloc) implicit none integer(c_int), intent(in) :: image_num type(prif_coarray_handle), intent(in) :: coarray_handle integer(c_size_t), intent(in) :: offset integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc end subroutine module subroutine prif_event_post_indirect(image_num, event_var_ptr, stat, errmsg, errmsg_alloc) implicit none integer(c_int), intent(in) :: image_num integer(c_intptr_t), intent(in) :: event_var_ptr integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc end subroutine module subroutine prif_event_wait(event_var_ptr, until_count, stat, errmsg, errmsg_alloc) implicit none type(c_ptr), intent(in) :: event_var_ptr integer(c_int64_t), intent(in), optional :: until_count integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc end subroutine module subroutine prif_event_query(event_var_ptr, count, stat) implicit none type(c_ptr), intent(in) :: event_var_ptr integer(c_int64_t), intent(out) :: count integer(c_int), intent(out), optional :: stat end subroutine module subroutine prif_notify_wait(notify_var_ptr, until_count, stat, errmsg, errmsg_alloc) implicit none type(c_ptr), intent(in) :: notify_var_ptr integer(c_int64_t), intent(in), optional :: until_count integer(c_int), intent(out), optional :: stat character(len=*), intent(inout), optional :: errmsg character(len=:), intent(inout), allocatable, optional :: errmsg_alloc end subroutine module subroutine prif_atomic_add(image_num, coarray_handle, offset, value, stat) implicit none integer(c_int), intent(in) :: image_num type(prif_coarray_handle), intent(in) :: coarray_handle integer(c_size_t), intent(in) :: offset integer(PRIF_ATOMIC_INT_KIND), intent(in) :: value integer(c_int), intent(out), optional :: stat end subroutine module subroutine prif_atomic_add_indirect(image_num, atom_remote_ptr, value, stat) implicit none integer(c_int), intent(in) :: image_num integer(c_intptr_t), intent(in) :: atom_remote_ptr integer(PRIF_ATOMIC_INT_KIND), intent(in) :: value integer(c_int), intent(out), optional :: stat end subroutine module subroutine prif_atomic_and(image_num, coarray_handle, offset, value, stat) implicit none integer(c_int), intent(in) :: image_num type(prif_coarray_handle), intent(in) :: coarray_handle integer(c_size_t), intent(in) :: offset integer(PRIF_ATOMIC_INT_KIND), intent(in) :: value integer(c_int), intent(out), optional :: stat end subroutine module subroutine prif_atomic_and_indirect(image_num, atom_remote_ptr, value, stat) implicit none integer(c_int), intent(in) :: image_num integer(c_intptr_t), intent(in) :: atom_remote_ptr integer(PRIF_ATOMIC_INT_KIND), intent(in) :: value integer(c_int), intent(out), optional :: stat end subroutine module subroutine prif_atomic_or(image_num, coarray_handle, offset, value, stat) implicit none integer(c_int), intent(in) :: image_num type(prif_coarray_handle), intent(in) :: coarray_handle integer(c_size_t), intent(in) :: offset integer(PRIF_ATOMIC_INT_KIND), intent(in) :: value integer(c_int), intent(out), optional :: stat end subroutine module subroutine prif_atomic_or_indirect(image_num, atom_remote_ptr, value, stat) implicit none integer(c_int), intent(in) :: image_num integer(c_intptr_t), intent(in) :: atom_remote_ptr integer(PRIF_ATOMIC_INT_KIND), intent(in) :: value integer(c_int), intent(out), optional :: stat end subroutine module subroutine prif_atomic_xor(image_num, coarray_handle, offset, value, stat) implicit none integer(c_int), intent(in) :: image_num type(prif_coarray_handle), intent(in) :: coarray_handle integer(c_size_t), intent(in) :: offset integer(PRIF_ATOMIC_INT_KIND), intent(in) :: value integer(c_int), intent(out), optional :: stat end subroutine module subroutine prif_atomic_xor_indirect(image_num, atom_remote_ptr, value, stat) implicit none integer(c_int), intent(in) :: image_num integer(c_intptr_t), intent(in) :: atom_remote_ptr integer(PRIF_ATOMIC_INT_KIND), intent(in) :: value integer(c_int), intent(out), optional :: stat end subroutine module subroutine prif_atomic_cas_int(image_num, coarray_handle, offset, old, compare, new, stat) implicit none integer(c_int), intent(in) :: image_num type(prif_coarray_handle), intent(in) :: coarray_handle integer(c_size_t), intent(in) :: offset integer(PRIF_ATOMIC_INT_KIND), intent(out) :: old integer(PRIF_ATOMIC_INT_KIND), intent(in) :: compare integer(PRIF_ATOMIC_INT_KIND), intent(in) :: new integer(c_int), intent(out), optional :: stat end subroutine module subroutine prif_atomic_cas_int_indirect(image_num, atom_remote_ptr, old, compare, new, stat) implicit none integer(c_int), intent(in) :: image_num integer(c_intptr_t), intent(in) :: atom_remote_ptr integer(PRIF_ATOMIC_INT_KIND), intent(out) :: old integer(PRIF_ATOMIC_INT_KIND), intent(in) :: compare integer(PRIF_ATOMIC_INT_KIND), intent(in) :: new integer(c_int), intent(out), optional :: stat end subroutine module subroutine prif_atomic_cas_logical(image_num, coarray_handle, offset, old, compare, new, stat) implicit none integer(c_int), intent(in) :: image_num type(prif_coarray_handle), intent(in) :: coarray_handle integer(c_size_t), intent(in) :: offset logical(PRIF_ATOMIC_LOGICAL_KIND), intent(out) :: old logical(PRIF_ATOMIC_LOGICAL_KIND), intent(in) :: compare logical(PRIF_ATOMIC_LOGICAL_KIND), intent(in) :: new integer(c_int), intent(out), optional :: stat end subroutine module subroutine prif_atomic_cas_logical_indirect(image_num, atom_remote_ptr, old, compare, new, stat) implicit none integer(c_int), intent(in) :: image_num integer(c_intptr_t), intent(in) :: atom_remote_ptr logical(PRIF_ATOMIC_LOGICAL_KIND), intent(out) :: old logical(PRIF_ATOMIC_LOGICAL_KIND), intent(in) :: compare logical(PRIF_ATOMIC_LOGICAL_KIND), intent(in) :: new integer(c_int), intent(out), optional :: stat end subroutine module subroutine prif_atomic_fetch_add(image_num, coarray_handle, offset, value, old, stat) implicit none integer(c_int), intent(in) :: image_num type(prif_coarray_handle), intent(in) :: coarray_handle integer(c_size_t), intent(in) :: offset integer(PRIF_ATOMIC_INT_KIND), intent(in) :: value integer(PRIF_ATOMIC_INT_KIND), intent(out) :: old integer(c_int), intent(out), optional :: stat end subroutine module subroutine prif_atomic_fetch_add_indirect(image_num, atom_remote_ptr, value, old, stat) implicit none integer(c_int), intent(in) :: image_num integer(c_intptr_t), intent(in) :: atom_remote_ptr integer(PRIF_ATOMIC_INT_KIND), intent(in) :: value integer(PRIF_ATOMIC_INT_KIND), intent(out) :: old integer(c_int), intent(out), optional :: stat end subroutine module subroutine prif_atomic_fetch_and(image_num, coarray_handle, offset, value, old, stat) implicit none integer(c_int), intent(in) :: image_num type(prif_coarray_handle), intent(in) :: coarray_handle integer(c_size_t), intent(in) :: offset integer(PRIF_ATOMIC_INT_KIND), intent(in) :: value integer(PRIF_ATOMIC_INT_KIND), intent(out) :: old integer(c_int), intent(out), optional :: stat end subroutine module subroutine prif_atomic_fetch_and_indirect(image_num, atom_remote_ptr, value, old, stat) implicit none integer(c_int), intent(in) :: image_num integer(c_intptr_t), intent(in) :: atom_remote_ptr integer(PRIF_ATOMIC_INT_KIND), intent(in) :: value integer(PRIF_ATOMIC_INT_KIND), intent(out) :: old integer(c_int), intent(out), optional :: stat end subroutine module subroutine prif_atomic_fetch_or(image_num, coarray_handle, offset, value, old, stat) implicit none integer(c_int), intent(in) :: image_num type(prif_coarray_handle), intent(in) :: coarray_handle integer(c_size_t), intent(in) :: offset integer(PRIF_ATOMIC_INT_KIND), intent(in) :: value integer(PRIF_ATOMIC_INT_KIND), intent(out) :: old integer(c_int), intent(out), optional :: stat end subroutine module subroutine prif_atomic_fetch_or_indirect(image_num, atom_remote_ptr, value, old, stat) implicit none integer(c_int), intent(in) :: image_num integer(c_intptr_t), intent(in) :: atom_remote_ptr integer(PRIF_ATOMIC_INT_KIND), intent(in) :: value integer(PRIF_ATOMIC_INT_KIND), intent(out) :: old integer(c_int), intent(out), optional :: stat end subroutine module subroutine prif_atomic_fetch_xor(image_num, coarray_handle, offset, value, old, stat) implicit none integer(c_int), intent(in) :: image_num type(prif_coarray_handle), intent(in) :: coarray_handle integer(c_size_t), intent(in) :: offset integer(PRIF_ATOMIC_INT_KIND), intent(in) :: value integer(PRIF_ATOMIC_INT_KIND), intent(out) :: old integer(c_int), intent(out), optional :: stat end subroutine module subroutine prif_atomic_fetch_xor_indirect(image_num, atom_remote_ptr, value, old, stat) implicit none integer(c_int), intent(in) :: image_num integer(c_intptr_t), intent(in) :: atom_remote_ptr integer(PRIF_ATOMIC_INT_KIND), intent(in) :: value integer(PRIF_ATOMIC_INT_KIND), intent(out) :: old integer(c_int), intent(out), optional :: stat end subroutine module subroutine prif_atomic_define_int(image_num, coarray_handle, offset, value, stat) implicit none integer(c_int), intent(in) :: image_num type(prif_coarray_handle), intent(in) :: coarray_handle integer(c_size_t), intent(in) :: offset integer(PRIF_ATOMIC_INT_KIND), intent(in) :: value integer(c_int), intent(out), optional :: stat end subroutine module subroutine prif_atomic_define_int_indirect(image_num, atom_remote_ptr, value, stat) implicit none integer(c_int), intent(in) :: image_num integer(c_intptr_t), intent(in) :: atom_remote_ptr integer(PRIF_ATOMIC_INT_KIND), intent(in) :: value integer(c_int), intent(out), optional :: stat end subroutine module subroutine prif_atomic_define_logical(image_num, coarray_handle, offset, value, stat) implicit none integer(c_int), intent(in) :: image_num type(prif_coarray_handle), intent(in) :: coarray_handle integer(c_size_t), intent(in) :: offset logical(PRIF_ATOMIC_LOGICAL_KIND), intent(in) :: value integer(c_int), intent(out), optional :: stat end subroutine module subroutine prif_atomic_define_logical_indirect(image_num, atom_remote_ptr, value, stat) implicit none integer(c_int), intent(in) :: image_num integer(c_intptr_t), intent(in) :: atom_remote_ptr logical(PRIF_ATOMIC_LOGICAL_KIND), intent(in) :: value integer(c_int), intent(out), optional :: stat end subroutine module subroutine prif_atomic_ref_int(image_num, coarray_handle, offset, value, stat) implicit none integer(c_int), intent(in) :: image_num type(prif_coarray_handle), intent(in) :: coarray_handle integer(c_size_t), intent(in) :: offset integer(PRIF_ATOMIC_INT_KIND), intent(out) :: value integer(c_int), intent(out), optional :: stat end subroutine module subroutine prif_atomic_ref_int_indirect(image_num, atom_remote_ptr, value, stat) implicit none integer(c_int), intent(in) :: image_num integer(c_intptr_t), intent(in) :: atom_remote_ptr integer(PRIF_ATOMIC_INT_KIND), intent(out) :: value integer(c_int), intent(out), optional :: stat end subroutine module subroutine prif_atomic_ref_logical(image_num, coarray_handle, offset, value, stat) implicit none integer(c_int), intent(in) :: image_num type(prif_coarray_handle), intent(in) :: coarray_handle integer(c_size_t), intent(in) :: offset logical(PRIF_ATOMIC_LOGICAL_KIND), intent(out) :: value integer(c_int), intent(out), optional :: stat end subroutine module subroutine prif_atomic_ref_logical_indirect(image_num, atom_remote_ptr, value, stat) implicit none integer(c_int), intent(in) :: image_num integer(c_intptr_t), intent(in) :: atom_remote_ptr logical(PRIF_ATOMIC_LOGICAL_KIND), intent(out) :: value integer(c_int), intent(out), optional :: stat end subroutine end interface ! Type definitions only relevant to Caffeine internals type, private, bind(C) :: prif_coarray_descriptor private type(c_ptr) :: coarray_data integer(c_int) :: corank integer(c_size_t) :: coarray_size type(c_funptr) :: final_func type(c_ptr) :: previous_handle = c_null_ptr, next_handle = c_null_ptr integer(c_int64_t) :: lcobounds(15), ucobounds(14) integer(c_int) :: coshape_epp(15) type(c_ptr) :: p_context_data type(c_ptr) :: reserved end type type, private :: prif_team_descriptor type(c_ptr) :: gex_team type(c_ptr) :: heap_mspace integer(c_intptr_t) :: heap_start integer(c_size_t) :: heap_size integer(c_int64_t) :: team_number integer(c_int) :: this_image, num_images type(prif_team_descriptor), pointer :: parent_team => null() type(prif_coarray_descriptor), pointer :: coarrays => null() type(child_team_info), pointer :: child_heap_info => null() end type type :: child_team_info type(c_ptr) :: allocated_memory integer(c_ptrdiff_t) :: offset integer(c_size_t) :: size end type end module prif fortran-caffeine-0.7.2/LICENSE.txt0000664000175000017500000001746615162221361016755 0ustar alastairalastair************************************* *** Caffeine Terms and Conditions *** ************************************* All files in this directory and all sub-directories (except where otherwise noted) are subject to the following copyright and licensing terms: *** Copyright Notice *** Caffeine Copyright (c) 2021-2026, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy), Archaeologic Inc., and Harris Snyder. All rights reserved. If you have questions about your rights to use or distribute this software, please contact Berkeley Lab's Intellectual Property Office at IPO@lbl.gov. NOTICE. This Software was developed under funding from the U.S. Department of Energy and the U.S. Government consequently retains certain rights. As such, the U.S. Government has been granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable, worldwide license in the Software to reproduce, distribute copies to the public, prepare derivative works, and perform publicly and display publicly, and to permit others to do so. *** License Agreement *** Caffeine Copyright (c) 2021-2026, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy), Archaeologic Inc., and Harris Snyder. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: (1) Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. (2) Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. (3) Neither the name of the University of California, Lawrence Berkeley National Laboratory, U.S. Dept. of Energy, Archaeologic Inc., Harris Snyder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. You are under no obligation whatsoever to provide any bug fixes, patches, or upgrades to the features, functionality or performance of the source code ("Enhancements") to anyone; however, if you choose to make your Enhancements available either publicly, or directly to Lawrence Berkeley National Laboratory, without imposing a separate written license agreement for such Enhancements, then you hereby grant the following license: a non-exclusive, royalty-free perpetual license to install, use, modify, prepare derivative works, incorporate into other computer software, distribute, and sublicense such enhancements or derivative works thereof, in binary and source code form. *** Subsidiary Packages *** The Caffeine package downloads and installs GASNet-EX, which is distributed subject to the following license. Some Caffeine source files are also based in part on GASNet-EX source files. For more details on GASNet licensing, visit https://gasnet.lbl.gov * --------------------------------------------------------------------------- Global-Address Space Networking for Exascale (GASNet-EX) Copyright (c) 2000-2026, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: (1) Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. (2) Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. (3) Neither the name of the University of California, Lawrence Berkeley National Laboratory, U.S. Dept. of Energy, nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. You are under no obligation whatsoever to provide any bug fixes, patches, or upgrades to the features, functionality or performance of the source code ("Enhancements") to anyone; however, if you choose to make your Enhancements available either publicly, or directly to Lawrence Berkeley National Laboratory, without imposing a separate written license agreement for such Enhancements, then you hereby grant the following license: a non-exclusive, royalty-free perpetual license to install, use, modify, prepare derivative works, incorporate into other computer software, distribute, and sublicense such enhancements or derivative works thereof, in binary and source code form. * --------------------------------------------------------------------------- The Caffeine package downloads and installs FPM (Fortran Package Manager), which is distributed subject to the following license. For more details on FPM licensing, visit https://github.com/fortran-lang/fpm * --------------------------------------------------------------------------- Copyright (c) 2020 fpm contributors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * --------------------------------------------------------------------------- fortran-caffeine-0.7.2/test/0000775000175000017500000000000015162221361016073 5ustar alastairalastairfortran-caffeine-0.7.2/test/prif_co_broadcast_test.F900000664000175000017500000000511315162221361023055 0ustar alastairalastairmodule prif_co_broadcast_test_m use prif, only : prif_co_broadcast, prif_num_images, prif_this_image_no_coarray use julienne_m, only : & usher & ,test_description_t & ,test_diagnosis_t & ,test_result_t & ,test_t & ,operator(//) & ,operator(.expect.) & ,operator(.equalsExpected.) implicit none private public :: prif_co_broadcast_test_t type, extends(test_t) :: prif_co_broadcast_test_t contains procedure, nopass, non_overridable :: subject procedure, nopass, non_overridable :: results end type type object_t integer i logical fallacy character(len=len("fooey")) actor complex issues end type interface operator(==) module procedure equals end interface contains pure function subject() result(test_subject) character(len=:), allocatable :: test_subject test_subject = "prif_co_broadcast" end function function results() result(test_results) type(test_result_t), allocatable :: test_results(:) type(prif_co_broadcast_test_t) prif_co_broadcast_test allocate(test_results, source = prif_co_broadcast_test%run([ & test_description_t("broadcasting a default integer scalar with no optional arguments present", usher(broadcast_default_integer_scalar)) & ,test_description_t("broadcasting a derived type scalar with no allocatable components", usher(broadcast_derived_type)) & ])) end function logical pure function equals(lhs, rhs) type(object_t), intent(in) :: lhs, rhs equals = all([ & lhs%i == rhs%i & ,lhs%fallacy .eqv. rhs%fallacy & ,lhs%actor == rhs%actor & ,lhs%issues == rhs%issues & ]) end function function broadcast_default_integer_scalar() result(diag) type(test_diagnosis_t) :: diag integer iPhone, me integer, parameter :: source_value = 7779311, junk = -99 call prif_this_image_no_coarray(this_image=me) iPhone = merge(source_value, junk, me==1) call prif_co_broadcast(iPhone, source_image=1) diag = iPhone .equalsExpected. source_value end function function broadcast_derived_type() result(diag) type(test_diagnosis_t) :: diag type(object_t) object integer me, ni call prif_this_image_no_coarray(this_image=me) call prif_num_images(num_images=ni) object = object_t(me, .false., "gooey", me*(1.,0.)) call prif_co_broadcast(object, source_image=ni) associate(expected_object => object_t(ni, .false., "gooey", ni*(1.,0.))) diag = .expect. (object == expected_object) // "co_broadcast derived type" end associate end function end module prif_co_broadcast_test_m fortran-caffeine-0.7.2/test/prif_teams_test.F900000664000175000017500000002626615162221361021557 0ustar alastairalastair#include "test-utils.F90" #include "language-support.F90" module prif_teams_test_m # include "test-uses-alloc.F90" use prif use julienne_m, only: test_description_t, test_diagnosis_t, test_result_t, test_t, string_t, usher & ,operator(.also.), operator(.isAtLeast.), operator(.isAtMost.), operator(.equalsExpected.), operator(//) implicit none private public :: prif_teams_test_t type, extends(test_t) :: prif_teams_test_t contains procedure, nopass, non_overridable :: subject procedure, nopass, non_overridable :: results end type integer :: cleanup_count = 0 contains pure function subject() character(len=:), allocatable :: subject subject = "PRIF Teams" end function function results() result(test_results) type(test_result_t), allocatable :: test_results(:) type(prif_teams_test_t) prif_teams_test allocate(test_results, source = prif_teams_test%run([ & test_description_t("creating, changing to, and allocating coarrays", usher(check_teams)) & ])) end function function check_teams() result(diag) type(test_diagnosis_t) :: diag integer :: dummy_element, i integer(c_int) :: initial_num_imgs, num_imgs, me, me_child, x integer(c_size_t) :: element_size integer(c_int64_t) :: which_team, n integer, parameter :: num_coarrays = 10 type(prif_coarray_handle) :: initial_coarray, coarrays(num_coarrays) type(c_ptr) :: allocated_memory type(prif_team_type) :: team, initial_team, t, team_self diag = .true. call prif_num_images(num_images=initial_num_imgs) ALSO2(initial_num_imgs .isAtLeast. 1, "invalid prif_num_images") call prif_this_image_no_coarray(this_image=me) ALSO2(me .isAtLeast. 1, "invalid prif_this_image") ALSO2(me .isAtMost. initial_num_imgs, "invalid prif_this_image") n = 0 ! clear outputs call prif_team_number(team_number=n) ALSO2(n .equalsExpected. -1_c_int64_t, "Initial team number is -1") n = 0 ! clear outputs call prif_get_team(team=initial_team) call prif_team_number(team=initial_team, team_number=n) ALSO2(n .equalsExpected. -1_c_int64_t, "prif_get_team retrieval of current initial team") ! ensure prif_sync_team is usable call prif_sync_team(team=initial_team) x = 0 ! clear outputs call prif_num_images_with_team(team=initial_team, num_images=x) ALSO2(x .equalsExpected. initial_num_imgs, "prif_num_images in initial team") x = 0 ! clear outputs call prif_num_images_with_team_number(team_number=-1_c_int64_t, num_images=x) ALSO2(x .equalsExpected. initial_num_imgs, "prif_num_images_with_team_number in initial team") x = 0 ! clear outputs call prif_this_image_no_coarray(team=initial_team, this_image=x) ALSO2(x .equalsExpected. me, "prif_this_image_no_coarray in initial team") t = prif_team_type() ; n = 0 ! clear outputs call prif_get_team(level=PRIF_INITIAL_TEAM, team=t) call prif_team_number(team=t, team_number=n) ALSO2(n .equalsExpected. -1_c_int64_t, "prif_get_team(PRIF_INITIAL_TEAM) retrieves initial team") t = prif_team_type() ; n = 0 ! clear outputs call prif_get_team(level=PRIF_CURRENT_TEAM, team=t) call prif_team_number(team=t, team_number=n) ALSO2(n .equalsExpected. -1_c_int64_t, "prif_get_team(PRIF_CURRENT_TEAM) retrieval of initial team when current team is initial team") t = prif_team_type() ; n = 0 ! clear outputs call prif_get_team(level=PRIF_PARENT_TEAM, team=t) call prif_team_number(team=t, team_number=n) ALSO2(n .equalsExpected. -1_c_int64_t, "prif_get_team(PRIF_PARENT_TEAM) retrieves initial team when parent team is initial team") element_size = int(storage_size(dummy_element)/8, c_size_t) call prif_allocate_coarray( & lcobounds = [1_c_int64_t], & ucobounds = [integer(c_int64_t)::], & size_in_bytes = element_size, & final_func = c_null_funptr, & coarray_handle = initial_coarray, & allocated_memory = allocated_memory) n = 0 ! clear outputs call prif_ucobound_with_dim(initial_coarray, 1, n) ALSO(n .equalsExpected. int(initial_num_imgs, c_int64_t)) which_team = merge(1_c_int64_t, 2_c_int64_t, mod(me, 2) == 0) call prif_form_team(team_number = which_team, team = team) call prif_change_team(team) call prif_num_images(num_images=num_imgs) ALSO2(num_imgs .equalsExpected. initial_num_imgs/2 + mod(initial_num_imgs,2)*(int(which_team)-1), "Team has correct number of images") ! ensure prif_sync_team is usable call prif_sync_team(team=team) call prif_sync_team(team=initial_team) x = 0 ! clear outputs call prif_num_images_with_team(team=team, num_images=x) ALSO2(x .equalsExpected. num_imgs, "prif_num_images works with team") x = 0 ! clear outputs call prif_num_images_with_team_number(team_number=which_team, num_images=x) ALSO2 (x .equalsExpected. num_imgs, "prif_num_images_with_team_number works with current team") call prif_this_image_no_coarray(this_image=me_child) ALSO2(me_child .equalsExpected. (me - 1)/2 + 1, "prif_this_image is valid") x = 0 ! clear outputs call prif_this_image_no_coarray(team=team, this_image=x) ALSO2(x .equalsExpected. me_child, "prif_this_image is valid") n = 0 ! clear outputs call prif_team_number(team_number=n) ALSO2(n .equalsExpected. which_team, "Correct current team number") n = 0 ! clear outputs call prif_team_number(team=team, team_number=n) ALSO2(n .equalsExpected. which_team, "Correct current team number") t = prif_team_type() ; n = 0 ! clear outputs call prif_get_team(team=t) call prif_team_number(team=t, team_number=n) ALSO2(n .equalsExpected. which_team, "prif_get_team retrieves current team") t = prif_team_type() ; n = 0 ! clear outputs call prif_get_team(level=PRIF_INITIAL_TEAM, team=t) call prif_team_number(team=t, team_number=n) ALSO2(n .equalsExpected. -1_c_int64_t, "prif_get_team(PRIF_INITIAL_TEAM) retrieves initial team") t = prif_team_type() ; n = 0 ! clear outputs call prif_get_team(level=PRIF_CURRENT_TEAM, team=t) call prif_team_number(team=t, team_number=n) ALSO2(n .equalsExpected. which_team, "prif_get_team(PRIF_CURRENT_TEAM) retrieves current team") t = prif_team_type() ; n = 0 ! clear outputs call prif_get_team(level=PRIF_PARENT_TEAM, team=t) call prif_team_number(team=t, team_number=n) ALSO2(n .equalsExpected. -1_c_int64_t, "prif_get_team(PRIF_PARENT_TEAM) retrieves initial team when parent team is initial team") x = 0 ! clear outputs call prif_num_images_with_team(team=initial_team, num_images=x) ALSO2(x .equalsExpected. initial_num_imgs, "prif_num_images works with initial team") x = 0 ! clear outputs call prif_num_images_with_team_number(team_number=-1_c_int64_t, num_images=x) ALSO2(x .equalsExpected. initial_num_imgs, "prif_num_images_with_team_number works with initial team") x = 0 ! clear outputs call prif_this_image_no_coarray(team=initial_team, this_image=x) ALSO2(x .equalsExpected. me, "prif_this_image_no_coarray works with initial team") n = 0 ! clear outputs call prif_ucobound_with_dim(initial_coarray, 1, n) ALSO(n .equalsExpected. int(num_imgs, c_int64_t)) ALSO(cleanup_count .equalsExpected. 0) do i = 1, num_coarrays call prif_allocate_coarray( & lcobounds = [1_c_int64_t], & ucobounds = [integer(c_int64_t)::], & size_in_bytes = element_size, & #if HAVE_FINAL_FUNC_SUPPORT final_func = c_funloc(coarray_cleanup), & # define CHECK_COUNT(n) ALSO(cleanup_count .equalsExpected. n) #else final_func = c_null_funptr, & # define CHECK_COUNT(n) #endif coarray_handle = coarrays(i), & allocated_memory = allocated_memory) end do CHECK_COUNT(0) call prif_deallocate_coarrays(coarrays(4:4)) call prif_deallocate_coarrays(coarrays(2:2)) call prif_deallocate_coarray(coarrays(7)) CHECK_COUNT(3) call prif_form_team(team_number = int(me,c_int64_t), team = team_self) x = 0 ! clear outputs call prif_num_images_with_team(team=team_self, num_images=x) ALSO2(x .equalsExpected. 1, "prif_num_images works with team_self") call prif_change_team(team_self) x = 0 ! clear outputs call prif_num_images(num_images=x) ALSO2(x .equalsExpected. 1, "prif_num_images works in team_self") x = 0 ! clear outputs call prif_this_image_no_coarray(this_image=x) ALSO2(x .equalsExpected. 1, "prif_this_image is valid in team_self") x = 0 ! clear outputs call prif_this_image_no_coarray(team=team, this_image=x) ALSO2(x .equalsExpected. me_child, "prif_this_image is valid") x = 0 ! clear outputs call prif_this_image_no_coarray(team=initial_team, this_image=x) ALSO2(x .equalsExpected. me, "prif_this_image is valid") ! ensure prif_sync_team is usable call prif_sync_team(team=team) call prif_sync_team(team=initial_team) call prif_sync_team(team=team_self) n = 0 ! clear outputs call prif_ucobound_with_dim(initial_coarray, 1, n) ALSO(n .equalsExpected. 1_c_int64_t) n = 0 ! clear outputs call prif_ucobound_with_dim(coarrays(3), 1, n) ALSO(n .equalsExpected. 1_c_int64_t) CHECK_COUNT(3) call prif_end_team() CHECK_COUNT(3) call prif_end_team() CHECK_COUNT(num_coarrays) ! ensure prif_sync_team is usable call prif_sync_team(team=team) call prif_sync_team(team=initial_team) t = prif_team_type() ; n = 0 ! clear outputs call prif_get_team(team=t) call prif_team_number(team=t, team_number=n) ALSO2(n .equalsExpected. -1_c_int64_t, "prif_end_team restores initial team") end function #if HAVE_FINAL_FUNC_SUPPORT subroutine coarray_cleanup(handle, stat, errmsg) bind(C) type(prif_coarray_handle), pointer, intent(in) :: handle integer(c_int), intent(out) :: stat character(len=:), intent(out), allocatable :: errmsg cleanup_count = cleanup_count + 1 stat = 0 end subroutine #endif end module prif_teams_test_m fortran-caffeine-0.7.2/test/prif_event_test.F900000664000175000017500000003073015162221361021556 0ustar alastairalastair#include "test-utils.F90" #include "assert_macros.h" ! TEST_ASSERT activates immediate assertions in test code #if !TEST_ASSERT #undef call_assert #define call_assert(c) #undef call_assert_describe #define call_assert_describe(c,d) #endif module prif_event_test_m # include "test-uses-alloc.F90" use assert_m use prif, only: & prif_event_type, prif_event_post, prif_event_post_indirect, prif_event_wait, prif_event_query, & prif_notify_type, prif_notify_wait, prif_put_with_notify, prif_put_strided_with_notify, & prif_num_images, & prif_put, & prif_sync_all, & prif_this_image_no_coarray use julienne_m, only: test_description_t, test_diagnosis_t, test_result_t, test_t, string_t, usher & ,operator(.all.), operator(.also.), operator(.equalsExpected.), operator(//) implicit none private public :: prif_event_test_t type, extends(test_t) :: prif_event_test_t contains procedure, nopass, non_overridable :: subject procedure, nopass, non_overridable :: results end type contains pure function subject() character(len=:), allocatable :: subject subject = "PRIF Events" end function function results() result(test_results) type(test_result_t), allocatable :: test_results(:) type(prif_event_test_t) prif_event_test allocate(test_results, source = prif_event_test%run([ & test_description_t("a serial event test", usher(check_event_serial)) & ,test_description_t("a parallel hot-spot event test", usher(check_event_parallel)) & ,test_description_t("a parallel hot-spot notify test", usher(check_notify)) & ])) end function function test_rand(lo, hi) result(result_) integer :: lo, hi, result_ real :: r call random_number(r) ! Generate a uniform random number in [0, 1) result_ = int(r * (hi - lo + 1)) + lo call_assert(result_ >= lo .and. result_ <= hi) end function function check_event_serial() result(diag) type(test_diagnosis_t) diag integer :: me type(prif_event_type) :: dummy_event integer(c_size_t) :: sizeof_event type(prif_coarray_handle) :: coarray_handle type(c_ptr) :: allocated_memory type(prif_event_type), pointer :: local_event integer(c_intptr_t) :: base_addr diag = .true. call RANDOM_INIT(REPEATABLE=.true., IMAGE_DISTINCT=.true.) sizeof_event = int(storage_size(dummy_event)/8, c_size_t) call prif_this_image_no_coarray(this_image=me) ! type(event_type) :: evt[*] call prif_allocate_coarray( & lcobounds = [1_c_int64_t], & ucobounds = [integer(c_int64_t)::], & size_in_bytes = sizeof_event, & final_func = c_null_funptr, & coarray_handle = coarray_handle, & allocated_memory = allocated_memory) call c_f_pointer(allocated_memory, local_event) base_addr = transfer(allocated_memory, base_addr) local_event = dummy_event ! default initialize block integer, parameter :: lim = 10 integer i, j, c, r integer(c_int64_t) :: count, expect character(len=50) :: context expect = 0 do i=1, lim call prif_event_query(c_loc(local_event), count) ALSO2(count .equalsExpected. expect, "event count at top of loop") call_assert(expect == count) do j=1,i call prif_event_post(me, coarray_handle, 0_c_size_t) expect = expect + 1 call prif_event_query(c_loc(local_event), count) ALSO2(count .equalsExpected. expect, "after event_post") call_assert(expect == count) call prif_event_post_indirect(me, base_addr) expect = expect + 1 call prif_event_query(c_loc(local_event), count) ALSO2(count .equalsExpected. expect, "event count after event_post_indirect") call_assert(expect == count) if (expect >= 1) then c = test_rand(1, int(expect)) if (c > 1) then context = "after event_wait(c)" call prif_event_wait(c_loc(local_event), int(c,c_int64_t)) else if (test_rand(0,1) == 0) then call_assert(c == 1) context = "after event_wait(1)" call prif_event_wait(c_loc(local_event), 1_c_int64_t) else if (test_rand(0,1) == 0) then call_assert(c == 1) context = "after event_wait()" call prif_event_wait(c_loc(local_event)) else call_assert(c == 1) context = "after event_wait(r)" r = test_rand(-50, 0) call prif_event_wait(c_loc(local_event), int(r,c_int64_t)) endif expect = expect - c call prif_event_query(c_loc(local_event), count) ALSO2(count .equalsExpected. expect, context) call_assert_describe(expect == count, context) end if end do end do end block call prif_deallocate_coarray(coarray_handle) end function function check_event_parallel() result(diag) type(test_diagnosis_t) :: diag integer :: me, num_imgs type(prif_event_type) :: dummy_event integer(c_size_t) :: sizeof_event, sizeof_int type(prif_coarray_handle) :: coarray_handle_evt type(prif_coarray_handle) :: coarray_handle_ctr type(c_ptr) :: allocated_memory type(prif_event_type), pointer :: local_evt integer, pointer :: local_ctr(:) diag = .true. sizeof_event = int(storage_size(dummy_event)/8, c_size_t) sizeof_int = c_sizeof(me) call prif_num_images(num_images=num_imgs) call prif_this_image_no_coarray(this_image=me) ! type(event_type) :: evt[*] call prif_allocate_coarray( & lcobounds = [1_c_int64_t], & ucobounds = [integer(c_int64_t)::], & size_in_bytes = sizeof_event, & final_func = c_null_funptr, & coarray_handle = coarray_handle_evt, & allocated_memory = allocated_memory) call c_f_pointer(allocated_memory, local_evt) local_evt = dummy_event ! default initialize ! integer :: ctr(num_images())[*] call prif_allocate_coarray( & lcobounds = [1_c_int64_t], & ucobounds = [integer(c_int64_t)::], & size_in_bytes = num_imgs * sizeof_int, & final_func = c_null_funptr, & coarray_handle = coarray_handle_ctr, & allocated_memory = allocated_memory) call c_f_pointer(allocated_memory, local_ctr, [num_imgs]) local_ctr = 0 ! initialize call prif_sync_all block integer, parameter :: lim = 10 integer, target :: i, j do i=1, lim ! every image writes a coarray value on image 1, then posts event ! ctr(me)[1] = i call prif_put( & image_num = 1, & coarray_handle = coarray_handle_ctr, & offset = (me-1) * sizeof_int, & current_image_buffer = c_loc(i), & size_in_bytes = sizeof_int) ! EVENT POST ( evt[1] ) call prif_event_post(1, coarray_handle_evt, 0_c_size_t) if (me == 1) then ! image 1 waits on the events, then validates data arrival ! EVENT WAIT ( evt, UNTIL_COUNT=num_imgs ) call prif_event_wait(c_loc(local_evt), int(num_imgs,c_int64_t)) ! validate ctr(:)[1] == i ALSO2(.all. (local_ctr(1:num_imgs) .equalsExpected. i), "gather result") ! image 1 writes back a coarray value to each image, then posts an event do j=1,num_imgs ! ctr(1)[j] = i call prif_put( & image_num = j, & coarray_handle = coarray_handle_ctr, & offset = 0_c_size_t, & current_image_buffer = c_loc(i), & size_in_bytes = sizeof_int) ! EVENT POST ( evt[j] ) call prif_event_post(j, coarray_handle_evt, 0_c_size_t) end do end if ! EVENT WAIT ( evt ) call prif_event_wait(c_loc(local_evt)) ! validate ctr(1)[me] == i ALSO2(local_ctr(1) .equalsExpected. i, "scatter result") end do end block call prif_deallocate_coarrays(([coarray_handle_ctr, coarray_handle_evt])) end function function check_notify() result(diag) type(test_diagnosis_t) diag integer :: me, num_imgs type(prif_notify_type) :: dummy_notify integer(c_size_t) :: sizeof_notify, sizeof_int type(prif_coarray_handle) :: coarray_handle_evt type(prif_coarray_handle) :: coarray_handle_ctr type(c_ptr) :: allocated_memory type(prif_notify_type), pointer :: local_evt integer, pointer :: local_ctr(:) diag = .true. sizeof_notify = int(storage_size(dummy_notify)/8, c_size_t) sizeof_int = c_sizeof(me) call prif_num_images(num_images=num_imgs) call prif_this_image_no_coarray(this_image=me) ! type(notify_type) :: evt[*] call prif_allocate_coarray( & lcobounds = [1_c_int64_t], & ucobounds = [integer(c_int64_t)::], & size_in_bytes = sizeof_notify, & final_func = c_null_funptr, & coarray_handle = coarray_handle_evt, & allocated_memory = allocated_memory) call c_f_pointer(allocated_memory, local_evt) local_evt = dummy_notify ! default initialize ! integer :: ctr(num_images())[*] call prif_allocate_coarray( & lcobounds = [1_c_int64_t], & ucobounds = [integer(c_int64_t)::], & size_in_bytes = num_imgs * sizeof_int, & final_func = c_null_funptr, & coarray_handle = coarray_handle_ctr, & allocated_memory = allocated_memory) call c_f_pointer(allocated_memory, local_ctr, [num_imgs]) local_ctr = 0 ! initialize call prif_sync_all block integer, parameter :: lim = 10 integer, target :: i, j do i=1, lim ! every image writes a coarray value on image 1 with notify ! ctr(me)[1,notify=evt] = i call prif_put_with_notify( & image_num = 1, & coarray_handle = coarray_handle_ctr, & offset = (me-1) * sizeof_int, & current_image_buffer = c_loc(i), & size_in_bytes = sizeof_int, & notify_coarray_handle = coarray_handle_evt, & notify_offset = 0_c_size_t) if (me == 1) then ! image 1 waits on the notifys, then validates data arrival ! NOTIFY WAIT ( evt, UNTIL_COUNT=num_imgs ) call prif_notify_wait(c_loc(local_evt), int(num_imgs,c_int64_t)) ! validate ctr(:)[1] == i ALSO2(.all. (local_ctr(1:num_imgs) .equalsExpected. i), "gather result") ! image 1 writes back a coarray value to each image with notify do j=1,num_imgs ! ctr(1)[j, notify=evt] = i call prif_put_strided_with_notify( & image_num = j, & coarray_handle = coarray_handle_ctr, & offset = 0_c_size_t, & remote_stride = [sizeof_int], & current_image_buffer = c_loc(i), & current_image_stride = [sizeof_int], & element_size = sizeof_int, & extent = [1_c_size_t], & notify_coarray_handle = coarray_handle_evt, & notify_offset = 0_c_size_t) end do end if ! NOTIFY WAIT ( evt ) call prif_notify_wait(c_loc(local_evt)) ! validate ctr(1)[me] == i ALSO2(local_ctr(1) .equalsExpected. i, "scatter result") end do end block call prif_deallocate_coarrays(([coarray_handle_ctr, coarray_handle_evt])) end function end module prif_event_test_m fortran-caffeine-0.7.2/test/prif_allocate_test.F900000664000175000017500000003155415162221361022226 0ustar alastairalastair#include "test-utils.F90" #include "language-support.F90" module prif_allocate_test_m # include "test-uses-alloc.F90" use prif, only : & prif_num_images, prif_size_bytes, & prif_set_context_data, prif_get_context_data, prif_local_data_pointer, & prif_alias_create, prif_alias_destroy, prif_this_image_no_coarray, & PRIF_STAT_OUT_OF_MEMORY use julienne_m, only: test_description_t, test_diagnosis_t, test_result_t, test_t, string_t, usher & ,operator(.all.), operator(.also.), operator(.equalsExpected.), operator(.isAtLeast.), operator(//) implicit none private public :: prif_allocate_test_t type, extends(test_t) :: prif_allocate_test_t contains procedure, nopass, non_overridable :: subject procedure, nopass, non_overridable :: results end type #if HAVE_FINAL_FUNC_SUPPORT ! Global state used to coordinate with finalizers integer :: ff_count type(prif_coarray_handle) :: ff_handle type(test_diagnosis_t) :: ff_diag logical :: ff_force_fail = .false. character(len=*), parameter :: ff_err = "test error message" #endif contains pure function subject() character(len=:), allocatable :: subject subject = "PRIF Allocation" end function function results() result(test_results) type(test_result_t), allocatable :: test_results(:) type(prif_allocate_test_t) prif_allocate_test allocate(test_results, source = prif_allocate_test%run([ & test_description_t("allocating, using and deallocating an integer scalar coarray with a corank of 1", & usher(check_allocate_integer_scalar_coarray_with_corank1)) & ,test_description_t("allocating, using and deallocating an integer array coarray with a corank of 2", & usher(check_allocate_integer_array_coarray_with_corank2)) & ,test_description_t("allocating, using and deallocating memory non-symmetrically", & usher(check_allocate_non_symmetric)) & ,test_description_t("allocating and deallocating coarrays with finalizers" & # if HAVE_FINAL_FUNC_SUPPORT , usher(check_final_func) & # endif ) & ,test_description_t("reporting out-of-memory errors", & usher(check_allocation_oom)) & ])) end function function check_allocate_integer_scalar_coarray_with_corank1() result(diag) type(test_diagnosis_t) diag ! Allocate memory for an integer scalar single corank coarray, such as the following decl ! integer :: coarr[*] integer :: dummy_element type(prif_coarray_handle) :: coarray_handle type(c_ptr) :: allocated_memory integer, pointer :: local_slice integer(c_size_t) :: data_size, query_size diag = .true. allocated_memory = c_null_ptr local_slice => null() ALSO(.not. associated(local_slice)) data_size = storage_size(dummy_element)/8 call prif_allocate_coarray( & [integer(c_int64_t) :: 1], [integer(c_int64_t) :: ], data_size, c_null_funptr, & coarray_handle, allocated_memory) call c_f_pointer(allocated_memory, local_slice) ALSO(associated(local_slice)) local_slice = 42 ALSO(local_slice .equalsExpected. 42) call prif_size_bytes(coarray_handle, data_size=query_size) ALSO2(query_size .equalsExpected. data_size, "invalid prif_size_bytes") block ! Check prif_{set,get}_context_data integer, target :: dummy(10), i type(c_ptr) :: expect, actual do i = 1,10 expect = c_loc(dummy(i)) actual = c_null_ptr call prif_set_context_data(coarray_handle, expect) call prif_get_context_data(coarray_handle, actual) ALSO2(actual .equalsExpected. expect, "prif_{set,get}_context_data are not working") end do end block call prif_deallocate_coarray(coarray_handle) end function #if HAVE_FINAL_FUNC_SUPPORT function check_final_func() result(retdiag) type(test_diagnosis_t) retdiag ! this function shares several global vars with finalizers, see ff_* above ! globalize diag for ALSO: # define diag ff_diag integer :: num_imgs, me, dummy_element type(c_ptr) :: allocated_memory integer, pointer :: local_slice integer(c_size_t) :: data_size, query_size integer(c_int) :: stat character(len=len(ff_err)) :: errmsg character(len=:), allocatable :: errmsg_alloc diag = .true. call prif_num_images(num_images=num_imgs) call prif_this_image_no_coarray(this_image=me) data_size = storage_size(dummy_element)/8 ! simple final_func case ff_count = 0 call prif_allocate_coarray( & [integer(c_int64_t) :: 1], [integer(c_int64_t) :: ], & data_size, c_funloc(coarray_cleanup_simple), & ff_handle, allocated_memory) ALSO(ff_count .equalsExpected. 0) call prif_deallocate_coarray(ff_handle) ALSO(ff_count .equalsExpected. 1) ! final_func that errors on first three deallocations ff_count = 0 call prif_allocate_coarray( & [integer(c_int64_t) :: 1], [integer(c_int64_t) :: ], & data_size, c_funloc(coarray_cleanup_first_error), & ff_handle, allocated_memory) ALSO(ff_count .equalsExpected. 0) call prif_deallocate_coarray3(ff_handle, stat, errmsg=errmsg) ALSO(ff_count .equalsExpected. 1) ALSO(stat .equalsExpected. 10) ALSO(errmsg .equalsExpected. ff_err) call prif_deallocate_coarrays3([ff_handle], stat, errmsg_alloc=errmsg_alloc) ALSO(ff_count .equalsExpected. 2) ALSO(stat .equalsExpected. 20) ALSO(errmsg_alloc .equalsExpected. ff_err) deallocate(errmsg_alloc) if (me == num_imgs) then ! test non-single-valued failure ff_force_fail = .true. end if call prif_deallocate_coarray3(ff_handle, stat, errmsg_alloc=errmsg_alloc) ALSO(ff_count .equalsExpected. 3) ALSO(stat .equalsExpected. 30) ALSO(errmsg_alloc .equalsExpected. ff_err) deallocate(errmsg_alloc) ff_force_fail = .false. call prif_deallocate_coarray3(ff_handle, stat, errmsg_alloc=errmsg_alloc) ALSO(ff_count .equalsExpected. 4) ALSO(stat .equalsExpected. 0) ALSO(.not. allocated(errmsg_alloc)) retdiag = diag end function subroutine coarray_cleanup_simple(handle , stat, errmsg) bind(C) type(prif_coarray_handle), pointer , intent(in) :: handle integer(c_int), intent(out) :: stat character(len=:), intent(out), allocatable :: errmsg ALSO(assert_aliased(handle, ff_handle, 0)) ff_count = ff_count + 1 stat = 0 end subroutine subroutine coarray_cleanup_first_error(handle , stat, errmsg) bind(C) type(prif_coarray_handle), pointer , intent(in) :: handle integer(c_int), intent(out) :: stat character(len=:), intent(out), allocatable :: errmsg ALSO(assert_aliased(handle, ff_handle, 0)) ff_count = ff_count + 1 errmsg = ff_err if (ff_count <= 2 .or. ff_force_fail) then stat = 10 * ff_count else stat = 0 end if end subroutine # undef diag #endif function check_allocate_non_symmetric() result(diag) type(test_diagnosis_t) diag type(c_ptr) :: allocated_memory integer(c_int), pointer :: local_slice call prif_allocate(sizeof(local_slice), allocated_memory) call c_f_pointer(allocated_memory, local_slice) local_slice = 42 diag = local_slice .equalsExpected. 42 call prif_deallocate(c_loc(local_slice)) end function ! returns (p + off) pure function c_ptr_add(p, off) type(c_ptr), intent(in) :: p integer(c_size_t), intent(in) :: off type(c_ptr) :: c_ptr_add integer(c_intptr_t) :: tmp tmp = transfer(p, tmp) tmp = tmp + off c_ptr_add = transfer(tmp, c_ptr_add) end function function assert_aliased(h1, h2, offset) result(diag) type(test_diagnosis_t) :: diag type(prif_coarray_handle) :: h1, h2 integer(c_size_t), optional :: offset integer(c_size_t) :: offset_ type(c_ptr) :: p1, p2 integer(c_size_t) :: s1, s2 type(c_ptr) :: c1, c2, cx integer, save, target :: dummy(10) integer, save :: di = 1 diag = .true. if (present(offset)) then offset_ = offset else offset_ = 0 endif call prif_local_data_pointer(h1, p1) call prif_local_data_pointer(h2, p2) ALSO(p2 .equalsExpected. c_ptr_add(p1, offset_)) ! As of PRIF 0.6. prif_size_bytes is unspecified for aliases, ! so this particular check is specific to the current Caffeine implementation call prif_size_bytes(h1, s1) call prif_size_bytes(h2, s2) ALSO(s2 .equalsExpected. s1) cx = c_loc(dummy(di)) di = mod(di,size(dummy)) + 1 call prif_set_context_data(h1, cx) call prif_get_context_data(h1, c1) ALSO(c1 .equalsExpected. cx) call prif_get_context_data(h2, c2) ALSO(c2 .equalsExpected. cx) call prif_set_context_data(h2, c_null_ptr) call prif_get_context_data(h1, c1) ALSO(.not. c_associated(c1)) end function function check_allocate_integer_array_coarray_with_corank2() result(diag) type(test_diagnosis_t) :: diag ! Allocate memory for an integer scalar single corank coarray, such as the following decl ! integer :: coarr(10)[4,*] integer :: dummy_element, num_imgs, i type(prif_coarray_handle) :: coarray_handle type(c_ptr) :: allocated_memory integer, pointer :: local_slice(:) integer(c_size_t) :: data_size, query_size diag = .true. call prif_num_images(num_images=num_imgs) allocated_memory = c_null_ptr local_slice => null() ALSO(.not.associated(local_slice)) data_size = 10*storage_size(dummy_element)/8 call prif_allocate_coarray( & [integer(c_int64_t) :: 1,1], [integer(c_int64_t) :: 4], data_size, c_null_funptr, & coarray_handle, allocated_memory) call prif_size_bytes(coarray_handle, data_size=query_size) ALSO2(query_size .equalsExpected. data_size, "invalid prif_size_bytes") call c_f_pointer(allocated_memory, local_slice, [10]) ALSO(associated(local_slice)) local_slice = [(i*i, i = 1, 10)] ALSO(.all. (local_slice .equalsExpected. [(i*i, i = 1, 10)])) block ! Check prif_{set,get}_context_data integer, target :: dummy(10), i type(c_ptr) :: expect, actual do i = 1,10 expect = c_loc(dummy(i)) actual = c_null_ptr call prif_set_context_data(coarray_handle, expect) call prif_get_context_data(coarray_handle, actual) ALSO2(actual .equalsExpected. expect, "prif_{set,get}_context_data not working") end do end block block ! check aliasing creation # if CAF_PRIF_VERSION <= 5 # define data_pointer_offset # else # define data_pointer_offset 0_c_size_t, # endif integer i, j integer, parameter :: lim = 10 type(prif_coarray_handle) :: a(lim) a(1) = coarray_handle do i=2, lim call prif_alias_create(a(i-1), [integer(c_int64_t) :: i-5], [integer(c_int64_t) :: i-5 + num_imgs], & data_pointer_offset a(i)) ALSO(assert_aliased(a(i-1), a(i))) do j = i+1,lim call prif_alias_create(a(i), [integer(c_int64_t) :: i, j-5], [integer(c_int64_t) :: j], & data_pointer_offset a(j)) ALSO(assert_aliased(a(i), a(j))) ALSO(assert_aliased(a(j), coarray_handle)) end do # if CAF_PRIF_VERSION >= 6 ! test PRIF 0.6 data_pointer_offset block type(prif_coarray_handle) :: b integer(c_size_t) :: off off = i call prif_alias_create(a(i), [integer(c_int64_t) :: i], [integer(c_int64_t) :: ], & off, b) ALSO(assert_aliased(a(i), b, off)) call prif_alias_destroy(b) end block # endif do j = i+1,lim call prif_alias_destroy(a(j)) end do end do do i=2, lim call prif_alias_destroy(a(i)) end do end block call prif_deallocate_coarray(coarray_handle) end function function check_allocation_oom() result(diag) type(test_diagnosis_t) diag integer(c_size_t) :: size_in_bytes type(c_ptr) :: allocated_memory integer(c_int) :: stat character(len=:), allocatable :: errmsg type(prif_coarray_handle) :: coarray_handle diag = .true. size_in_bytes = ishft(500_c_size_t, 40) ! 500TB call prif_allocate(size_in_bytes, allocated_memory, stat, errmsg_alloc=errmsg) ALSO(stat .equalsExpected. PRIF_STAT_OUT_OF_MEMORY) ALSO(allocated(errmsg)) if (allocated(errmsg)) then ALSO(len(errmsg) > 1) ALSO(index(errmsg, 'out of memory') .isAtLeast. 1) end if deallocate(errmsg) call prif_allocate_coarray( & [integer(c_int64_t) :: 1], [integer(c_int64_t) :: ], size_in_bytes, c_null_funptr, & coarray_handle, allocated_memory, stat, errmsg_alloc=errmsg) ALSO(stat .equalsExpected. PRIF_STAT_OUT_OF_MEMORY) ALSO(allocated(errmsg)) if (allocated(errmsg)) then ALSO(len(errmsg) > 1) ALSO(index(errmsg, 'out of memory') .isAtLeast. 1) end if deallocate(errmsg) end function end module prif_allocate_test_m fortran-caffeine-0.7.2/test/prif_stop_test.F900000664000175000017500000001033615162221361021422 0ustar alastairalastairmodule prif_stop_test_m use prif, only: prif_this_image_no_coarray, prif_sync_all use julienne_m, only: test_description_t, test_diagnosis_t, test_result_t, test_t, usher & ,operator(.equalsExpected.), operator(//) use unit_test_parameters_m, only : expected_stop_code, & image_one => subjob_setup, cmd_prefix => subjob_prefix, fpm_driver implicit none private public :: prif_stop_test_t type, extends(test_t) :: prif_stop_test_t contains procedure, nopass, non_overridable :: subject procedure, nopass, non_overridable :: results end type integer, parameter :: max_message_len = 128 contains pure function subject() character(len=:), allocatable :: subject subject = "prif_stop" end function function results() result(test_results) type(test_result_t), allocatable :: test_results(:) type(prif_stop_test_t) prif_stop_test allocate(test_results, source = prif_stop_test%run([ & test_description_t("delivering a zero exitstat when the stop code is omitted", usher(exit_with_no_stop_code)) & ,test_description_t("printing an integer stop code and delivering the non-zero exitstat", usher(exit_with_integer_stop_code)) & ,test_description_t("printing a character stop code and delivering a zero exitstat", usher(exit_with_character_stop_code)) & ,test_description_t("invoking a registered callback", usher(check_callback_invocation)) & ])) end function function exit_with_no_stop_code() result(diag) type(test_diagnosis_t) :: diag integer exit_status, cmd_stat character(len=max_message_len) command_message if (image_one()) then command_message = "exit_with_no_stop_code" call execute_command_line( & command = cmd_prefix//fpm_driver//" run --example stop_with_no_code > /dev/null 2>&1", & wait = .true., & exitstat = exit_status, & cmdstat = cmd_stat, & cmdmsg = command_message & ) diag = (exit_status .equalsExpected. 0) // command_message else diag = .true. end if end function function exit_with_integer_stop_code() result(diag) type(test_diagnosis_t) :: diag integer exit_status, cmd_stat character(len=max_message_len) command_message if (image_one()) then command_message = "exit_with_integer_stop_code" call execute_command_line( & command = cmd_prefix//fpm_driver//" run --example stop_with_integer_code > /dev/null 2>&1", & wait = .true., & exitstat = exit_status, & cmdstat = cmd_stat, & cmdmsg = command_message & ) diag = (exit_status .equalsExpected. expected_stop_code) // command_message else diag = .true. end if end function function exit_with_character_stop_code() result(diag) type(test_diagnosis_t) :: diag integer exit_status, cmd_stat character(len=max_message_len) command_message if (image_one()) then command_message = "exit_with_character_stop_code" call execute_command_line( & command = cmd_prefix//fpm_driver//" run --example stop_with_character_code > /dev/null 2>&1", & wait = .true., & exitstat = exit_status, & cmdstat = cmd_stat, & cmdmsg = command_message & ) ! the standard recommends zero exit status for character stop codes diag = (exit_status .equalsExpected. 0) // command_message else diag = .true. end if end function function check_callback_invocation() result(diag) type(test_diagnosis_t) :: diag integer exit_status, cmd_stat character(len=max_message_len) command_message if (image_one()) then call execute_command_line( & command = cmd_prefix//fpm_driver//" run --example register_stop_callback > /dev/null 2>&1", & wait = .true., & exitstat = exit_status, & cmdstat = cmd_stat, & cmdmsg = command_message & ) diag = (exit_status .equalsExpected. 0) // command_message else diag = .true. end if end function end module prif_stop_test_m fortran-caffeine-0.7.2/test/prif_sync_images_test.F900000664000175000017500000000605215162221361022736 0ustar alastairalastairmodule prif_sync_images_test_m use iso_c_binding, only: c_int use prif, only : prif_sync_images, prif_this_image_no_coarray, prif_num_images, prif_sync_all use julienne_m, only: test_description_t, test_diagnosis_t, test_result_t, test_t, usher implicit none private public :: prif_sync_images_test_t type, extends(test_t) :: prif_sync_images_test_t contains procedure, nopass, non_overridable :: subject procedure, nopass, non_overridable :: results end type integer, parameter :: lim = 10 contains pure function subject() result(test_subject) character(len=:), allocatable :: test_subject test_subject = "prif_sync_images" end function function results() result(test_results) type(test_result_t), allocatable :: test_results(:) type(prif_sync_images_test_t) prif_sync_images_test allocate(test_results, source = prif_sync_images_test%run([ & test_description_t("synchronizing an image with itself", usher(check_serial)), & test_description_t("synchronizing with a neighbor", usher(check_neighbor)), & test_description_t("synchronizing every image with one image", usher(check_hot)) & ])) end function function check_serial() result(diag) type(test_diagnosis_t) :: diag integer(c_int) :: me integer i call prif_this_image_no_coarray(this_image=me) call prif_sync_all ! synchronize with myself an image-dependent number of times: do i=1, lim*me call prif_sync_images([me]) end do call prif_sync_all diag = .true. end function function check_neighbor() result(diag) type(test_diagnosis_t) :: diag integer(c_int) me, num_imgs integer i call prif_this_image_no_coarray(this_image=me) call prif_num_images(num_images=num_imgs) call prif_sync_all ! test based on F23 11.7.4 note 3 do i=1, lim if (me > 1) call prif_sync_images([me-1]) if (me < num_imgs) call prif_sync_images([me+1]) end do call prif_sync_all diag = .true. end function function check_hot() result(diag) type(test_diagnosis_t) :: diag integer(c_int) :: me, num_imgs integer :: i call prif_this_image_no_coarray(this_image=me) call prif_num_images(num_images=num_imgs) call prif_sync_all ! all images synchronize with 1 if (me == 1) then block integer(c_int) :: everyone(num_imgs) everyone = [(i, i=1,num_imgs)] do i=1, lim ! SYNC IMAGES (*) call prif_sync_images() end do do i=1, lim call prif_sync_images(everyone) end do end block else do i=1, lim*2 call prif_sync_images([1]) end do endif call prif_sync_all diag = .true. end function end module prif_sync_images_test_m fortran-caffeine-0.7.2/test/prif_co_min_test.F900000664000175000017500000001626015162221361021703 0ustar alastairalastairmodule prif_co_min_test_m use iso_c_binding, only: c_int8_t, c_int16_t, c_int32_t, c_int64_t, c_float, c_double use prif, only : prif_co_min, prif_co_min_character, prif_this_image_no_coarray, prif_num_images use julienne_m, only: & operator(.all.) & ,operator(.approximates.) & ,operator(.within.) & ,operator(.equalsExpected.) & ,usher & ,test_description_t & ,test_diagnosis_t & ,test_result_t & ,test_t implicit none private public :: prif_co_min_test_t type, extends(test_t) :: prif_co_min_test_t contains procedure, nopass, non_overridable :: subject procedure, nopass, non_overridable :: results end type contains pure function subject() result(test_subject) character(len=:), allocatable :: test_subject test_subject = "prif_co_min" end function function results() result(test_results) type(test_result_t), allocatable :: test_results(:) type(prif_co_min_test_t) prif_co_min_test allocate(test_results, source = prif_co_min_test%run([ & test_description_t("computing element-wise minima for integer(c_int32_t) scalars", usher(check_32_bit_integer)) & ,test_description_t("computing element-wise minima for a 1D default integer array", usher(check_default_integer)) & ,test_description_t("computing element-wise minima for a 1D integer(c_int8t) array", usher(check_8_bit_integer)) & ,test_description_t("computing element-wise minima for a 1D integer(c_int16_t) array", usher(check_16_bit_integer)) & ,test_description_t("computing element-wise minima for a 1D integer(c_int64_t) array", usher(check_64_bit_integer)) & ,test_description_t("computing element-wise minima for a 2D real(c_float) array", usher(check_32_bit_real)) & ,test_description_t("computing element-wise minima for a 1D real(c_double) array", usher(check_64_bit_real)) & ,test_description_t("computing element-wise minima for a character scalar", usher(check_character)) & ])) end function function check_default_integer() result(diag) type(test_diagnosis_t) :: diag integer, parameter :: values(*,*) = reshape([1, -19, 5, 13, 11, 7, 17, 3], [2, 4]) integer, dimension(size(values,1)) :: my_val, expected integer me, ni, i call prif_this_image_no_coarray(this_image=me) call prif_num_images(ni) my_val = values(:, mod(me-1, size(values,2))+1) call prif_co_min(my_val) expected = minval(reshape([(values(:, mod(i-1,size(values,2))+1), i = 1, ni)], [size(values,1),ni]), dim=2) diag = .all. (my_val .equalsExpected. expected) end function function check_8_bit_integer() result(diag) type(test_diagnosis_t) :: diag integer(c_int8_t), parameter :: values(*,*) = reshape(int([1, -19, 5, 13, 11, 7, 17, 3],c_int8_t), [2, 4]) integer :: me, ni, i integer(c_int8_t), dimension(size(values,1)) :: my_val, expected call prif_this_image_no_coarray(this_image=me) call prif_num_images(ni) my_val = values(:, mod(me-1, size(values,2))+1) call prif_co_min(my_val) expected = minval(reshape([(values(:, mod(i-1,size(values,2))+1), i = 1, ni)], [size(values,1),ni]), dim=2) diag = .all. (int(my_val) .equalsExpected. int(expected)) end function function check_16_bit_integer() result(diag) type(test_diagnosis_t) :: diag integer(c_int16_t), parameter :: values(*,*) = reshape(int([1, -19, 5, 13, 11, 7, 17, 3],c_int16_t), [2, 4]) integer :: me, ni, i integer(c_int16_t), dimension(size(values,1)) :: my_val, expected call prif_this_image_no_coarray(this_image=me) call prif_num_images(ni) my_val = values(:, mod(me-1, size(values,2))+1) call prif_co_min(my_val) expected = minval(reshape([(values(:, mod(i-1,size(values,2))+1), i = 1, ni)], [size(values,1),ni]), dim=2) diag = .all. (int(my_val) .equalsExpected. int(expected)) end function function check_32_bit_integer() result(diag) type(test_diagnosis_t) :: diag integer(c_int32_t), parameter :: values(*) = [1, -19, 5, 13, 11, 7, 17, 3] integer :: me, ni, i integer(c_int32_t) :: my_val, expected call prif_this_image_no_coarray(this_image=me) call prif_num_images(ni) my_val = values(mod(me-1, size(values))+1) call prif_co_min(my_val) expected = minval([(values(mod(i-1,size(values))+1), i = 1, ni)]) diag = my_val .equalsExpected. expected end function function check_64_bit_integer() result(diag) type(test_diagnosis_t) :: diag integer(c_int64_t), parameter :: values(*,*) = reshape([1, -19, 5, 13, 11, 7, 17, 3], [2, 4]) integer :: me, ni, i integer(c_int64_t), dimension(size(values,1)) :: my_val, expected call prif_this_image_no_coarray(this_image=me) call prif_num_images(ni) my_val = values(:, mod(me-1, size(values,2))+1) call prif_co_min(my_val) expected = minval(reshape([(values(:, mod(i-1,size(values,2))+1), i = 1, ni)], [size(values,1),ni]), dim=2) diag = .all. (my_val .equalsExpected. expected) end function function check_32_bit_real() result(diag) type(test_diagnosis_t) :: diag real(c_float), parameter :: values(*,*,*) = reshape([1, 19, 5, 13, 11, 7, 17, 3], [2,2,2]) real(c_float), parameter :: tolerance = 0_c_double integer :: me, ni, i real(c_float), dimension(size(values,1), size(values,2)) :: my_val, expected call prif_this_image_no_coarray(this_image=me) call prif_num_images(ni) my_val = values(:, :, mod(me-1, size(values,3))+1) call prif_co_min(my_val) expected = minval(reshape([(values(:,:,mod(i-1,size(values,3))+1), i = 1, ni)], [size(values,1), size(values,2), ni]), dim=3) diag = .all. (expected .approximates. my_val .within. tolerance) end function function check_64_bit_real() result(diag) type(test_diagnosis_t) :: diag real(c_double), parameter :: values(*,*) = reshape([1, 19, 5, 13, 11, 7, 17, 3], [2, 4]) real(c_double), parameter :: tolerance = 0_c_double integer :: me, ni, i real(c_double), dimension(size(values,1)) :: my_val, expected call prif_this_image_no_coarray(this_image=me) call prif_num_images(ni) my_val = values(:, mod(me-1, size(values,2))+1) call prif_co_min(my_val) expected = minval(reshape([(values(:, mod(i-1,size(values,2))+1), i = 1, ni)], [size(values,1),ni]), dim=2) diag = .all. (my_val .approximates. expected .within. tolerance) end function function check_character() result(diag) type(test_diagnosis_t) :: diag character(len=*), parameter :: values(*) = & [ "To be ","or not " & , "to ","be. " & , "that ","is " & , "the ","question"] character(len=len(values)) my_val integer me, ni, i call prif_this_image_no_coarray(this_image=me) call prif_num_images(ni) my_val = values(mod(me-1, size(values))+1) call prif_co_min_character(my_val) ! issue #205: workaround flang optimizer bug with a temp associate(tmp => [(values(mod(i-1,size(values))+1), i = 1, ni)]) diag = .all. (my_val .equalsExpected. minval(tmp)) end associate end function end module prif_co_min_test_m fortran-caffeine-0.7.2/test/prif_co_sum_test.F900000664000175000017500000002151115162221361021717 0ustar alastairalastairmodule prif_co_sum_test_m use iso_c_binding, only: c_int8_t, c_int16_t, c_int32_t, c_int64_t, c_float, c_double use prif, only : prif_co_sum, prif_num_images, prif_this_image_no_coarray use julienne_m, only: & operator(.all.) & ,operator(.also.) & ,operator(.approximates.) & ,operator(.equalsExpected.) & ,operator(.within.) & ,usher & ,test_description_t & ,test_diagnosis_t & ,test_result_t & ,test_t implicit none private public :: prif_co_sum_test_t type, extends(test_t) :: prif_co_sum_test_t contains procedure, nopass, non_overridable :: subject procedure, nopass, non_overridable :: results end type contains pure function subject() result(test_subject) character(len=:), allocatable :: test_subject test_subject = "prif_co_sum" end function function results() result(test_results) type(test_result_t), allocatable :: test_results(:) type(prif_co_sum_test_t) prif_co_sum_test allocate(test_results, source = prif_co_sum_test%run([ & test_description_t("computing the element-wise sum of a 1D default integer array", usher(check_default_integer)) & ,test_description_t("computing the element-wise sum of a 1D 8-bit integer(c_int8_t) array", usher(check_8_bit_integer)) & ,test_description_t("computing the element-wise sum of a 1D 16-bit integer(c_int16_t) array", usher(check_16_bit_integer)) & ,test_description_t("computing the element-wise sum of integer(c_int32_t) scalars", usher(check_32_bit_integer)) & ,test_description_t("computing the element-wise sum of a 1D 64-bit integer(c_int64_t) array", usher(check_64_bit_integer)) & ,test_description_t("computing the element-wise sum of a 2D 32-bit real(c_float) array", usher(check_32_bit_real)) & ,test_description_t("computing the element-wise sum of a 1D 64-bit real(c_double) array", usher(check_64_bit_real)) & ,test_description_t("computing the element-wise sum of a 2D complex(c_float) array", usher(check_32_bit_complex)) & ,test_description_t("computing the element-wise sum of a 1D complex(c_double) array", usher(check_64_bit_complex)) & ])) end function function check_default_integer() result(diag) type(test_diagnosis_t) :: diag integer, parameter :: values(*,*) = reshape([1, -19, 5, 13, 11, 7, 17, 3], [2, 4]) integer :: me, ni, i integer, dimension(size(values,1)) :: my_val, expected call prif_this_image_no_coarray(this_image=me) call prif_num_images(ni) my_val = values(:, mod(me-1, size(values,2))+1) call prif_co_sum(my_val) expected = sum(reshape([(values(:, mod(i-1,size(values,2))+1), i = 1, ni)], [size(values,1),ni]), dim=2) diag = .all. (my_val .equalsExpected. expected) end function function check_8_bit_integer() result(diag) type(test_diagnosis_t) :: diag integer(c_int8_t), parameter :: values(*,*) = reshape(int([1, -19, 5, 13, 11, 7, 17, 3],c_int8_t), [2, 4]) integer :: me, ni, i integer(c_int8_t), dimension(size(values,1)) :: my_val, expected call prif_this_image_no_coarray(this_image=me) call prif_num_images(ni) my_val = values(:, mod(me-1, size(values,2))+1) call prif_co_sum(my_val) expected = sum(reshape([(values(:, mod(i-1,size(values,2))+1), i = 1, ni)], [size(values,1),ni]), dim=2) diag = .all. (int(my_val) .equalsExpected. int(expected)) end function function check_16_bit_integer() result(diag) type(test_diagnosis_t) :: diag integer(c_int16_t), parameter :: values(*,*) = reshape(int([1, -19, 5, 13, 11, 7, 17, 3],c_int16_t), [2, 4]) integer :: me, ni, i integer(c_int16_t), dimension(size(values,1)) :: my_val, expected call prif_this_image_no_coarray(this_image=me) call prif_num_images(ni) my_val = values(:, mod(me-1, size(values,2))+1) call prif_co_sum(my_val) expected = sum(reshape([(values(:, mod(i-1,size(values,2))+1), i = 1, ni)], [size(values,1),ni]), dim=2) diag = .all. (int(my_val) .equalsExpected. int(expected)) end function function check_32_bit_integer() result(diag) type(test_diagnosis_t) :: diag integer(c_int32_t), parameter :: values(*) = [1, -19, 5, 13, 11, 7, 17, 3] integer :: me, ni, i integer(c_int32_t) :: my_val, expected call prif_this_image_no_coarray(this_image=me) call prif_num_images(ni) my_val = values(mod(me-1, size(values))+1) call prif_co_sum(my_val) expected = sum([(values(mod(i-1,size(values))+1), i = 1, ni)]) diag = my_val .equalsExpected. expected end function function check_64_bit_integer() result(diag) type(test_diagnosis_t) :: diag integer(c_int64_t), parameter :: values(*,*) = reshape([1, -19, 5, 13, 11, 7, 17, 3], [2, 4]) integer :: me, ni, i integer(c_int64_t), dimension(size(values,1)) :: my_val, expected call prif_this_image_no_coarray(this_image=me) call prif_num_images(ni) my_val = values(:, mod(me-1, size(values,2))+1) call prif_co_sum(my_val) expected = sum(reshape([(values(:, mod(i-1,size(values,2))+1), i = 1, ni)], [size(values,1),ni]), dim=2) diag = .all. (my_val .equalsExpected. expected) end function function check_32_bit_real() result(diag) type(test_diagnosis_t) :: diag real(c_float), parameter :: values(*,*,*) = reshape([1, 19, 5, 13, 11, 7, 17, 3], [2,2,2]) real(c_float), parameter :: tolerance = 0_c_float integer :: me, ni, i real(c_float), dimension(size(values,1), size(values,2)) :: my_val, expected call prif_this_image_no_coarray(this_image=me) call prif_num_images(ni) my_val = values(:, :, mod(me-1, size(values,3))+1) call prif_co_sum(my_val) expected = sum(reshape([(values(:,:,mod(i-1,size(values,3))+1), i = 1, ni)], [size(values,1), size(values,2), ni]), dim=3) diag = .all. (my_val .approximates. expected .within. tolerance) end function function check_64_bit_real() result(diag) type(test_diagnosis_t) :: diag real(c_double), parameter :: values(*,*) = reshape([1, 19, 5, 13, 11, 7, 17, 3], [2, 4]) real(c_double), parameter :: tolerance = 0_c_double integer :: me, ni, i real(c_double), dimension(size(values,1)) :: my_val, expected call prif_this_image_no_coarray(this_image=me) call prif_num_images(ni) my_val = values(:, mod(me-1, size(values,2))+1) call prif_co_sum(my_val) expected = sum(reshape([(values(:, mod(i-1,size(values,2))+1), i = 1, ni)], [size(values,1),ni]), dim=2) diag = .all. (my_val .approximates. expected .within. tolerance) end function function check_32_bit_complex() result(diag) type(test_diagnosis_t) :: diag complex(c_float), parameter :: values(*,*,*) = reshape( & [ cmplx(1., 53.), cmplx(3., 47.) & , cmplx(5., 43.), cmplx(7., 41.) & , cmplx(11., 37.), cmplx(13., 31.) & , cmplx(17., 29.), cmplx(19., 23.) & ], & [2,2,2]) real(c_float), parameter :: tolerance = 0_c_float integer :: me, ni, i complex(c_float), dimension(size(values,1),size(values,2)) :: my_val, expected call prif_this_image_no_coarray(this_image=me) call prif_num_images(ni) my_val = values(:, :, mod(me-1, size(values,3))+1) call prif_co_sum(my_val) expected = sum(reshape([(values(:,:,mod(i-1,size(values,3))+1), i = 1, ni)], [size(values,1), size(values,2), ni]), dim=3) diag = & .all. (real(my_val) .approximates. real(expected) .within. tolerance) & .also. (.all. (aimag(my_val) .approximates. aimag(expected) .within. tolerance)) end function function check_64_bit_complex() result(diag) type(test_diagnosis_t) :: diag complex(c_double), parameter :: values(*,*) = reshape( & [ cmplx(1., 53.), cmplx(3., 47.) & , cmplx(5., 43.), cmplx(7., 41.) & , cmplx(11., 37.), cmplx(13., 31.) & , cmplx(17., 29.), cmplx(19., 23.) & ], & [2,4]) real(c_double), parameter :: tolerance = 0_c_double integer me, ni, i complex(c_double), dimension(size(values,1)) :: my_val, expected call prif_this_image_no_coarray(this_image=me) call prif_num_images(ni) my_val = values(:, mod(me-1, size(values,2))+1) call prif_co_sum(my_val) expected = sum(reshape([(values(:,mod(i-1,size(values,2))+1), i = 1, ni)], [size(values,1), ni]), dim=2) ! TODO: simplify once Julienne issue #137 is implemented diag = & .all. (real(my_val, c_double) .approximates. real(expected, c_double) .within. tolerance) & .also. (.all. (real(aimag(my_val), c_double) .approximates. real(aimag(expected), c_double) .within. tolerance)) end function end module prif_co_sum_test_m fortran-caffeine-0.7.2/test/prif_strided_test.F900000664000175000017500000002506215162221361022075 0ustar alastairalastairmodule prif_strided_test_m # include "test-uses-alloc.F90" use prif, only: & prif_num_images, & prif_get, & prif_put_strided, & prif_put_strided_indirect, & prif_get_strided, & prif_get_strided_indirect, & prif_sync_all, & prif_this_image_no_coarray use julienne_m, only: test_description_t, test_diagnosis_t, test_result_t, test_t, usher & ,operator(.all.), operator(.equalsExpected.) implicit none private public :: prif_strided_test_t type, extends(test_t) :: prif_strided_test_t contains procedure, nopass, non_overridable :: subject procedure, nopass, non_overridable :: results end type contains pure function subject() character(len=:), allocatable :: subject subject = "PRIF Strided RMA" end function function results() result(test_results) type(test_result_t), allocatable :: test_results(:) type(prif_strided_test_t) prif_strided_test allocate(test_results, source = prif_strided_test%run([ & test_description_t("putting strided data to another image", usher(check_put)) & ,test_description_t("putting strided data with indirect interface", usher(check_put_indirect)) & ,test_description_t("getting strided data from another image", usher(check_get)) & ,test_description_t("getting strided data with indirect interface", usher(check_get_indirect)) & ])) end function function check_put() result(diag) type(test_diagnosis_t) :: diag integer :: me, num_imgs, neighbor type(prif_coarray_handle) :: coarray_handle type(c_ptr) :: allocated_memory integer, target :: mydata(1:4, 1:4) integer, target :: expected(1:4, 1:4) integer, pointer :: local_slice(:,:) integer(c_size_t) :: sizeof_int sizeof_int = storage_size(me)/8 call prif_num_images(num_images=num_imgs) call prif_this_image_no_coarray(this_image=me) neighbor = merge(me+1, 1, me < num_imgs) call prif_allocate_coarray( & [integer(c_int64_t) :: 1], [integer(c_int64_t)::], & size_in_bytes = sizeof_int*product(shape(mydata)), & final_func = c_null_funptr, & coarray_handle = coarray_handle, & allocated_memory = allocated_memory) call c_f_pointer(allocated_memory, local_slice, shape(mydata)) ! init data arrays to known values local_slice = -1 expected = -1 mydata = 0 call prif_sync_all ! simple example: we set, then copy the interior rectangle of a 4x4 array mydata(2:3, 2:3) = me expected(2:3, 2:3) = merge(me-1, num_imgs, me > 1) call prif_put_strided( & image_num = neighbor, & coarray_handle = coarray_handle, & offset = 5*sizeof_int, & remote_stride = [4*sizeof_int, sizeof_int], & current_image_buffer = c_loc(mydata(2,2)), & current_image_stride = [4*sizeof_int, sizeof_int], & element_size = sizeof_int, & extent = [2_c_size_t, 2_c_size_t]) call prif_sync_all diag = .all. (local_slice .equalsExpected. expected) call prif_deallocate_coarray(coarray_handle) end function function check_put_indirect() result(diag) type(test_diagnosis_t) :: diag type :: my_type type(c_ptr) :: my_component end type type(my_type), target :: dummy_element integer, pointer :: component_access(:,:) integer :: me, num_imgs, neighbor integer, target :: mydata(1:4, 1:4) integer, target :: expected(1:4, 1:4) type(prif_coarray_handle) :: coarray_handle type(c_ptr) :: allocated_memory type(my_type), pointer :: local_slice integer(c_intptr_t) :: base_addr integer(c_size_t) :: sizeof_int sizeof_int = storage_size(me)/8 call prif_num_images(num_images=num_imgs) call prif_this_image_no_coarray(this_image=me) neighbor = merge(me+1, 1, me < num_imgs) call prif_allocate_coarray( & [integer(c_int64_t) :: 1], [integer(c_int64_t)::], & size_in_bytes = int(storage_size(dummy_element)/8, c_size_t), & final_func = c_null_funptr, & coarray_handle = coarray_handle, & allocated_memory = allocated_memory) call c_f_pointer(allocated_memory, local_slice) call prif_allocate( & size_in_bytes = int(sizeof_int*product(shape(mydata)), c_size_t), & allocated_memory = local_slice%my_component) call c_f_pointer(local_slice%my_component, component_access, shape(mydata)) ! init data arrays to known values component_access = -1 expected = -1 mydata = 0 call prif_sync_all ! simple example: we set, then copy the interior rectangle of a 4x4 array mydata(2:3, 2:3) = me expected(2:3, 2:3) = merge(me-1, num_imgs, me > 1) call prif_get( & image_num = neighbor, & coarray_handle = coarray_handle, & offset = 0_c_size_t, & current_image_buffer = c_loc(dummy_element), & size_in_bytes = int(storage_size(dummy_element)/8, c_size_t)) base_addr = transfer(dummy_element%my_component, base_addr) call prif_put_strided_indirect( & image_num = neighbor, & remote_ptr = base_addr + 5*sizeof_int, & remote_stride = [4*sizeof_int, sizeof_int], & current_image_buffer = c_loc(mydata(2,2)), & current_image_stride = [4*sizeof_int, sizeof_int], & element_size = sizeof_int, & extent = [2_c_size_t, 2_c_size_t]) call prif_sync_all diag = .all. (component_access .equalsExpected. expected) call prif_deallocate(local_slice%my_component) call prif_deallocate_coarray(coarray_handle) end function function check_get() result(diag) type(test_diagnosis_t) :: diag integer :: me, num_imgs, neighbor type(prif_coarray_handle) :: coarray_handle type(c_ptr) :: allocated_memory integer, target :: mydata(1:4, 1:4) integer, target :: expected(1:4, 1:4) integer, pointer :: local_slice(:,:) integer(c_size_t) :: sizeof_int sizeof_int = storage_size(me)/8 call prif_num_images(num_images=num_imgs) call prif_this_image_no_coarray(this_image=me) neighbor = merge(me+1, 1, me < num_imgs) call prif_allocate_coarray( & [integer(c_int64_t) :: 1], [integer(c_int64_t)::], & size_in_bytes = sizeof_int*product(shape(mydata)), & final_func = c_null_funptr, & coarray_handle = coarray_handle, & allocated_memory = allocated_memory) call c_f_pointer(allocated_memory, local_slice, shape(mydata)) ! simple example: we copy the interior rectangle of a 4x4 array local_slice = -1 local_slice(2:3, 2:3) = me expected = 0 expected(2:3, 2:3) = neighbor mydata = 0 call prif_sync_all call prif_get_strided( & image_num = neighbor, & coarray_handle = coarray_handle, & offset = 5*sizeof_int, & remote_stride = [4*sizeof_int, sizeof_int], & current_image_buffer = c_loc(mydata(2,2)), & current_image_stride = [4*sizeof_int, sizeof_int], & element_size = sizeof_int, & extent = [2_c_size_t, 2_c_size_t]) call prif_sync_all diag = .all. (mydata .equalsExpected. expected) call prif_deallocate_coarray(coarray_handle) end function function check_get_indirect() result(diag) type(test_diagnosis_t) :: diag type :: my_type type(c_ptr) :: my_component end type type(my_type), target :: dummy_element integer, pointer :: component_access(:,:) integer :: me, num_imgs, neighbor integer, target :: mydata(1:4, 1:4) integer, target :: expected(1:4, 1:4) type(prif_coarray_handle) :: coarray_handle type(c_ptr) :: allocated_memory type(my_type), pointer :: local_slice integer(c_intptr_t) :: base_addr integer(c_size_t) :: sizeof_int sizeof_int = storage_size(me)/8 call prif_num_images(num_images=num_imgs) call prif_this_image_no_coarray(this_image=me) neighbor = merge(me+1, 1, me < num_imgs) call prif_allocate_coarray( & [integer(c_int64_t) :: 1], [integer(c_int64_t)::], & size_in_bytes = int(storage_size(dummy_element)/8, c_size_t), & final_func = c_null_funptr, & coarray_handle = coarray_handle, & allocated_memory = allocated_memory) call c_f_pointer(allocated_memory, local_slice) call prif_allocate( & size_in_bytes = int(sizeof_int*product(shape(mydata)), c_size_t), & allocated_memory = local_slice%my_component) call c_f_pointer(local_slice%my_component, component_access, shape(mydata)) ! simple example: we copy the interior rectangle of a 4x4 array component_access = -1 component_access(2:3, 2:3) = me expected = 0 expected(2:3, 2:3) = neighbor mydata = 0 call prif_sync_all call prif_get( & image_num = neighbor, & coarray_handle = coarray_handle, & offset = 0_c_size_t, & current_image_buffer = c_loc(dummy_element), & size_in_bytes = int(storage_size(dummy_element)/8, c_size_t)) base_addr = transfer(dummy_element%my_component, base_addr) call prif_get_strided_indirect( & image_num = neighbor, & remote_ptr = base_addr + 5*sizeof_int, & remote_stride = [4*sizeof_int, sizeof_int], & current_image_buffer = c_loc(mydata(2,2)), & current_image_stride = [4*sizeof_int, sizeof_int], & element_size = sizeof_int, & extent = [2_c_size_t, 2_c_size_t]) call prif_sync_all diag = .all. (mydata .equalsExpected. expected) call prif_deallocate(local_slice%my_component) call prif_deallocate_coarray(coarray_handle) end function end module fortran-caffeine-0.7.2/test/prif_rma_test.F900000664000175000017500000002063515162221361021217 0ustar alastairalastairmodule prif_rma_test_m # include "test-uses-alloc.F90" use prif, only: & prif_num_images, & prif_put, & prif_put_indirect, & prif_get, & prif_get_indirect, & prif_sync_all, & prif_sync_memory, & prif_this_image_no_coarray use julienne_m, only: test_description_t, test_diagnosis_t, test_result_t, test_t, usher, operator(.equalsExpected.) implicit none private public :: prif_rma_test_t type, extends(test_t) :: prif_rma_test_t contains procedure, nopass, non_overridable :: subject procedure, nopass, non_overridable :: results end type contains pure function subject() character(len=:), allocatable :: subject subject = "PRIF RMA" end function function results() result(test_results) type(test_result_t), allocatable :: test_results(:) type(prif_rma_test_t) prif_rma_test allocate(test_results, source = prif_rma_test%run([ & test_description_t("sending a value to another image", usher(check_put)) & ,test_description_t("sending a value with indirect interface", usher(check_put_indirect)) & ,test_description_t("getting a value from another image", usher(check_get)) & ,test_description_t("getting a value with indirect interface", usher(check_get_indirect)) & ])) end function function check_put() result(diag) type(test_diagnosis_t) :: diag integer :: dummy_element, num_imgs, expected, neighbor integer, target :: me type(prif_coarray_handle) :: coarray_handle type(c_ptr) :: allocated_memory integer, pointer :: local_slice call prif_allocate_coarray( & [integer(c_int64_t) :: 1], [integer(c_int64_t)::], & size_in_bytes = int(storage_size(dummy_element)/8, c_size_t), & final_func = c_null_funptr, & coarray_handle = coarray_handle, & allocated_memory = allocated_memory) call c_f_pointer(allocated_memory, local_slice) call prif_num_images(num_images=num_imgs) call prif_this_image_no_coarray(this_image=me) neighbor = merge(me+1, 1, me < num_imgs) expected = merge(me-1, num_imgs, me > 1) call prif_put( & image_num = neighbor, & coarray_handle = coarray_handle, & offset = 0_c_size_t, & current_image_buffer = c_loc(me), & size_in_bytes = c_sizeof(me)) call prif_sync_all ! superfluous, just to ensure prif_sync_memory is usable call prif_sync_memory diag = local_slice .equalsExpected. expected call prif_deallocate_coarray(coarray_handle) end function function check_put_indirect() result(diag) type(test_diagnosis_t) :: diag type :: my_type type(c_ptr) :: my_component end type type(my_type), target :: dummy_element integer, pointer :: component_access integer :: dummy_component, num_imgs, expected, neighbor integer, target :: me type(prif_coarray_handle) :: coarray_handle type(c_ptr) :: allocated_memory type(my_type), pointer :: local_slice integer(c_intptr_t) :: base_addr call prif_allocate_coarray( & [integer(c_int64_t) :: 1], [integer(c_int64_t)::], & size_in_bytes = int(storage_size(dummy_element)/8, c_size_t), & final_func = c_null_funptr, & coarray_handle = coarray_handle, & allocated_memory = allocated_memory) call c_f_pointer(allocated_memory, local_slice) call prif_allocate( & size_in_bytes = int(storage_size(dummy_component)/8, c_size_t), & allocated_memory = local_slice%my_component) call prif_sync_all call prif_num_images(num_images=num_imgs) call prif_this_image_no_coarray(this_image=me) neighbor = merge(me+1, 1, me < num_imgs) expected = merge(me-1, num_imgs, me > 1) call prif_get( & image_num = neighbor, & coarray_handle = coarray_handle, & offset = 0_c_size_t, & current_image_buffer = c_loc(dummy_element), & size_in_bytes = int(storage_size(dummy_element)/8, c_size_t)) base_addr = transfer(dummy_element%my_component, base_addr) call prif_put_indirect( & image_num = neighbor, & remote_ptr = base_addr, & current_image_buffer = c_loc(me), & size_in_bytes = int(storage_size(me)/8, c_size_t)) call prif_sync_all call c_f_pointer(local_slice%my_component, component_access) diag = component_access .equalsExpected. expected call prif_deallocate(local_slice%my_component) call prif_deallocate_coarray(coarray_handle) end function function check_get() result(diag) type(test_diagnosis_t) :: diag integer :: dummy_element, num_imgs, me, neighbor, expected integer, target :: retrieved type(prif_coarray_handle) :: coarray_handle type(c_ptr) :: allocated_memory integer, pointer :: local_slice call prif_allocate_coarray( & [integer(c_int64_t) :: 1], [integer(c_int64_t)::], & size_in_bytes = int(storage_size(dummy_element)/8, c_size_t), & final_func = c_null_funptr, & coarray_handle = coarray_handle, & allocated_memory = allocated_memory) call c_f_pointer(allocated_memory, local_slice) call prif_num_images(num_images=num_imgs) call prif_this_image_no_coarray(this_image=me) neighbor = merge(me+1, 1, me < num_imgs) expected = neighbor local_slice = me call prif_sync_all call prif_get( & image_num = neighbor, & coarray_handle = coarray_handle, & offset = 0_c_size_t, & current_image_buffer = c_loc(retrieved), & size_in_bytes = c_sizeof(retrieved)) diag = retrieved .equalsExpected. expected call prif_deallocate_coarray(coarray_handle) end function function check_get_indirect() result(diag) type(test_diagnosis_t) :: diag type :: my_type type(c_ptr) :: my_component end type type(my_type), target :: dummy_element integer, pointer :: component_access integer :: dummy_component, num_imgs, me, expected, neighbor integer, target :: retrieved type(prif_coarray_handle) :: coarray_handle type(c_ptr) :: allocated_memory type(my_type), pointer :: local_slice integer(c_intptr_t) :: base_addr call prif_allocate_coarray( & [integer(c_int64_t) :: 1], [integer(c_int64_t)::], & size_in_bytes = int(storage_size(dummy_element)/8, c_size_t), & final_func = c_null_funptr, & coarray_handle = coarray_handle, & allocated_memory = allocated_memory) call c_f_pointer(allocated_memory, local_slice) call prif_allocate( & size_in_bytes = int(storage_size(dummy_component)/8, c_size_t), & allocated_memory = local_slice%my_component) call prif_num_images(num_images=num_imgs) call prif_this_image_no_coarray(this_image=me) neighbor = merge(me+1, 1, me < num_imgs) expected = neighbor call c_f_pointer(local_slice%my_component, component_access) component_access = me call prif_sync_all call prif_get( & image_num = neighbor, & coarray_handle = coarray_handle, & offset = 0_c_size_t, & current_image_buffer = c_loc(dummy_element), & size_in_bytes = int(storage_size(dummy_element)/8, c_size_t)) base_addr = transfer(dummy_element%my_component, base_addr) call prif_get_indirect( & image_num = neighbor, & remote_ptr = base_addr, & current_image_buffer = c_loc(retrieved), & size_in_bytes = int(storage_size(retrieved)/8, c_size_t)) diag = retrieved .equalsExpected. expected call prif_deallocate(local_slice%my_component) call prif_deallocate_coarray(coarray_handle) end function end module fortran-caffeine-0.7.2/test/julienne-driver.F900000664000175000017500000001066015162221361021460 0ustar alastairalastair! Copyright (c) 2024-2025, The Regents of the University of California ! Terms of use are as specified in LICENSE.txt program test_suite_driver use julienne_m use prif, only: prif_this_image_no_coarray, prif_num_images, prif_sync_all, prif_co_sum, prif_error_stop use iso_c_binding, only: c_int, c_bool use prif_init_test_m, only : prif_init_test_t, check_caffeination use prif_num_images_test_m, only : prif_num_images_test_t use prif_this_image_no_coarray_test_m, only : prif_this_image_no_coarray_test_t use prif_image_queries_test_m, only : prif_image_queries_test_t use prif_types_test_m, only : prif_types_test_t use prif_co_broadcast_test_m, only : prif_co_broadcast_test_t use prif_co_sum_test_m, only : prif_co_sum_test_t use prif_co_max_test_m, only : prif_co_max_test_t use prif_co_min_test_m, only : prif_co_min_test_t use prif_co_reduce_test_m, only :prif_co_reduce_test_t use prif_sync_images_test_m, only : prif_sync_images_test_t use prif_image_index_test_m, only : prif_image_index_test_t use prif_allocate_test_m, only : prif_allocate_test_t use prif_coarray_inquiry_test_m, only : prif_coarray_inquiry_test_t use prif_teams_test_m, only : prif_teams_test_t use prif_rma_test_m, only : prif_rma_test_t use prif_strided_test_m, only : prif_strided_test_t use prif_event_test_m, only : prif_event_test_t use prif_image_queries_test_m, only : prif_image_queries_test_t use prif_atomic_test_m, only : prif_atomic_test_t use prif_error_stop_test_m, only : prif_error_stop_test_t use prif_stop_test_m, only : prif_stop_test_t implicit none type(test_diagnosis_t) :: dummy dummy = check_caffeination() ! ensure an early call to prif_init # if JULIENNE_PARALLEL_CALLBACKS julienne_this_image => julienne_callback_this_image julienne_num_images => julienne_callback_num_images julienne_sync_all => julienne_callback_sync_all julienne_co_sum_integer => julienne_callback_co_sum_integer julienne_error_stop => julienne_callback_error_stop # endif associate(test_harness => test_harness_t([ & ! tests for basic functionality that are mostly self-contained test_fixture_t( prif_init_test_t() ) & ,test_fixture_t( prif_num_images_test_t() ) & ,test_fixture_t( prif_this_image_no_coarray_test_t() ) & ,test_fixture_t( prif_image_queries_test_t() ) & ,test_fixture_t( prif_types_test_t() ) & ! collectives tests ,test_fixture_t( prif_co_broadcast_test_t() ) & ,test_fixture_t( prif_co_sum_test_t() ) & ,test_fixture_t( prif_co_max_test_t() ) & ,test_fixture_t( prif_co_min_test_t() ) & ,test_fixture_t( prif_co_reduce_test_t() ) & ! tests that rely primarily upon coarrays ,test_fixture_t( prif_allocate_test_t() ) & ! should be first coarray test ,test_fixture_t( prif_coarray_inquiry_test_t() ) & ,test_fixture_t( prif_image_index_test_t() ) & ,test_fixture_t( prif_rma_test_t() ) & ,test_fixture_t( prif_strided_test_t() ) & ! synchronization and data race tests ,test_fixture_t( prif_event_test_t() ) & ,test_fixture_t( prif_atomic_test_t() ) & ,test_fixture_t( prif_sync_images_test_t() ) & ! internally uses coarrays and events ! complicated multi-feature tests ,test_fixture_t( prif_teams_test_t() ) & ! exit tests ,test_fixture_t( prif_error_stop_test_t() ) & ,test_fixture_t( prif_stop_test_t() ) & ])) call test_harness%report_results end associate contains function julienne_callback_this_image() result(this_image_id) implicit none integer :: this_image_id integer(c_int) :: me call prif_this_image_no_coarray(this_image=me) this_image_id = int(me) end function function julienne_callback_num_images() result(image_count) implicit none integer :: image_count integer(c_int) :: ni call prif_num_images(ni) image_count = int(ni) end function subroutine julienne_callback_sync_all() implicit none call prif_sync_all() end subroutine subroutine julienne_callback_co_sum_integer(a, result_image) implicit none integer, intent(inout), target :: a(:) integer, intent(in), optional :: result_image call prif_co_sum(a, result_image) end subroutine subroutine julienne_callback_error_stop(stop_code_char) implicit none character(len=*), intent(in) :: stop_code_char call prif_error_stop(quiet=.false._c_bool, stop_code_char=stop_code_char) end subroutine end program test_suite_driver fortran-caffeine-0.7.2/test/prif_co_max_test.F900000664000175000017500000001666615162221361021717 0ustar alastairalastairmodule prif_co_max_test_m use iso_c_binding, only: c_int8_t, c_int16_t, c_int32_t, c_int64_t, c_float, c_double use prif, only : prif_co_max, prif_co_max_character, prif_this_image_no_coarray, prif_num_images use julienne_m, only: & operator(.all.) & ,operator(.approximates.) & ,operator(.within.) & ,operator(.equalsExpected.) & ,usher & ,test_description_t & ,test_diagnosis_t & ,test_result_t & ,test_t implicit none private public :: prif_co_max_test_t type, extends(test_t) :: prif_co_max_test_t contains procedure, nopass, non_overridable :: subject procedure, nopass, non_overridable :: results end type contains pure function subject() result(test_subject) character(len=:), allocatable :: test_subject test_subject = "prif_co_max" end function function results() result(test_results) type(test_result_t), allocatable :: test_results(:) type(prif_co_max_test_t) prif_co_max_test allocate(test_results, source = prif_co_max_test%run([ & test_description_t("computing element-wise maxima for integer(c_int32_t) scalars", usher(check_32_bit_integer)) & ,test_description_t("computing element-wise maxima for a 1D default integer array", usher(check_default_integer)) & ,test_description_t("computing element-wise maxima for a 1D integer(c_int8_t) array", usher(check_8_bit_integer)) & ,test_description_t("computing element-wise maxima for a 1D integer(c_int16_t) array", usher(check_16_bit_integer)) & ,test_description_t("computing element-wise maxima for a 1D integer(c_int64_t) array", usher(check_64_bit_integer)) & ,test_description_t("computing element-wise maxima for a 2D real(c_float) array", usher(check_32_bit_real)) & ,test_description_t("computing element-wise maxima for a 1D real(c_double) array", usher(check_64_bit_real)) & ,test_description_t("computing element-wise maxima for character scalars", usher(check_character)) & ])) end function function check_default_integer() result(diag) type(test_diagnosis_t) :: diag integer, parameter :: values(*,*) = reshape([1, -19, 5, 13, 11, 7, 17, 3], [2, 4]) integer :: me, ni, i integer, dimension(size(values,1)) :: my_val, expected call prif_this_image_no_coarray(this_image=me) call prif_num_images(ni) my_val = values(:, mod(me-1, size(values,2))+1) call prif_co_max(my_val) expected = maxval(reshape([(values(:, mod(i-1,size(values,2))+1), i = 1, ni)], [size(values,1),ni]), dim=2) diag = .all. (my_val .equalsExpected. expected) end function function check_8_bit_integer() result(diag) type(test_diagnosis_t) :: diag integer(c_int8_t), parameter :: values(*,*) = reshape(int([1, -19, 5, 13, 11, 7, 17, 3],c_int8_t), [2, 4]) integer :: me, ni, i integer(c_int8_t), dimension(size(values,1)) :: my_val, expected call prif_this_image_no_coarray(this_image=me) call prif_num_images(ni) my_val = values(:, mod(me-1, size(values,2))+1) call prif_co_max(my_val) expected = maxval(reshape([(values(:, mod(i-1,size(values,2))+1), i = 1, ni)], [size(values,1),ni]), dim=2) diag = .all. (int(my_val) .equalsExpected. int(expected)) end function function check_16_bit_integer() result(diag) type(test_diagnosis_t) :: diag integer(c_int16_t), parameter :: values(*,*) = reshape(int([1, -19, 5, 13, 11, 7, 17, 3],c_int16_t), [2, 4]) integer :: me, ni, i integer(c_int16_t), dimension(size(values,1)) :: my_val, expected call prif_this_image_no_coarray(this_image=me) call prif_num_images(ni) my_val = values(:, mod(me-1, size(values,2))+1) call prif_co_max(my_val) expected = maxval(reshape([(values(:, mod(i-1,size(values,2))+1), i = 1, ni)], [size(values,1),ni]), dim=2) diag = .all. (int(my_val) .equalsExpected. int(expected)) end function function check_32_bit_integer() result(diag) type(test_diagnosis_t) :: diag integer(c_int32_t), parameter :: values(*) = [1, -19, 5, 13, 11, 7, 17, 3] integer :: me, ni, i integer(c_int32_t) :: my_val, expected call prif_this_image_no_coarray(this_image=me) call prif_num_images(ni) my_val = values(mod(me-1, size(values))+1) call prif_co_max(my_val) expected = maxval([(values(mod(i-1,size(values))+1), i = 1, ni)]) diag = my_val .equalsExpected. expected end function function check_64_bit_integer() result(diag) type(test_diagnosis_t) :: diag integer(c_int64_t), parameter :: values(*,*) = reshape([1, -19, 5, 13, 11, 7, 17, 3], [2, 4]) integer :: me, ni, i integer(c_int64_t), dimension(size(values,1)) :: my_val, expected call prif_this_image_no_coarray(this_image=me) call prif_num_images(ni) my_val = values(:, mod(me-1, size(values,2))+1) call prif_co_max(my_val) expected = maxval(reshape([(values(:, mod(i-1,size(values,2))+1), i = 1, ni)], [size(values,1),ni]), dim=2) diag = .all. (my_val .equalsExpected. expected) end function function check_32_bit_real() result(diag) type(test_diagnosis_t) :: diag real(c_float), parameter :: values(*,*,*) = reshape([1, 19, 5, 13, 11, 7, 17, 3], [2,2,2]) real(c_float), parameter :: tolerance = 0_c_float integer :: me, ni, i real(c_float), dimension(size(values,1), size(values,2)) :: my_val, expected call prif_this_image_no_coarray(this_image=me) call prif_num_images(ni) my_val = values(:, :, mod(me-1, size(values,3))+1) call prif_co_max(my_val) expected = maxval(reshape([(values(:,:,mod(i-1,size(values,3))+1), i = 1, ni)], [size(values,1), size(values,2), ni]), dim=3) diag = .all. (my_val .approximates. expected .within. tolerance) end function function check_64_bit_real() result(diag) type(test_diagnosis_t) :: diag real(c_double), parameter :: values(*,*) = reshape([1, 19, 5, 13, 11, 7, 17, 3], [2, 4]) real(c_double), parameter :: tolerance = 0_c_double integer :: me, ni, i real(c_double), dimension(size(values,1)) :: my_val, expected call prif_this_image_no_coarray(this_image=me) call prif_num_images(ni) my_val = values(:, mod(me-1, size(values,2))+1) call prif_co_max(my_val) expected = maxval(reshape([(values(:, mod(i-1,size(values,2))+1), i = 1, ni)], [size(values,1),ni]), dim=2) diag = .all. (my_val .approximates. expected .within. tolerance) end function function check_character() result(diag) type(test_diagnosis_t) :: diag character(len=*), parameter :: values(*) = & [ "To be ","or not " & , "to ","be. " & , "that ","is " & , "the ","question"] integer :: me, ni, i character(len=len(values)) :: my_val call prif_this_image_no_coarray(this_image=me) call prif_num_images(ni) my_val = values(mod(me-1, size(values))+1) call prif_co_max_character(my_val) ! issue #205: workaround flang optimizer bug with a temp associate(tmp => [(values(mod(i-1,size(values))+1), i = 1, ni)]) diag = my_val .equalsExpected. maxval(tmp) end associate end function end module prif_co_max_test_m fortran-caffeine-0.7.2/test/test-utils.F900000664000175000017500000000153715162221361020476 0ustar alastairalastair#ifndef CAF_INCLUDED_TEST_UTILS #define CAF_INCLUDED_TEST_UTILS #ifndef CPP_STRINGIFY_SOURCE # if defined(__GFORTRAN__) || defined(_CRAYFTN) || defined(NAGFOR) || defined(__LFORTRAN__) # define CPP_STRINGIFY_SOURCE(x) "x" # else # define CPP_STRINGIFY_SOURCE(x) #x # endif #endif #ifndef CPP_LINE_STRING # if defined(__GFORTRAN__) ! work-around Gfortran's defective preprocessor # define CPP_LINE_STRING string_t(__LINE__) # else # define CPP_LINE_STRING_HELPER(n) CPP_STRINGIFY_SOURCE(n) # define CPP_LINE_STRING CPP_LINE_STRING_HELPER(__LINE__) # endif #endif #define ALSO(exp) ALSO2(exp, "expression: (" // CPP_STRINGIFY_SOURCE(exp) // ")") #define ALSO2(exp,desc) diag = diag .also. \ ( test_diagnosis_t(exp, NEW_LINE('')) // \ __FILE__ // ":" // CPP_LINE_STRING // ": FAILED: " // desc ) #endif fortran-caffeine-0.7.2/test/prif_num_images_test.F900000664000175000017500000000237115162221361022561 0ustar alastairalastairmodule prif_num_images_test_m use prif, only : prif_num_images use julienne_m, only: & operator(//) & ,operator(.isAtLeast.) & ,usher & ,test_description_t & ,test_diagnosis_t & ,test_result_t & ,test_t implicit none private public :: prif_num_images_test_t type, extends(test_t) :: prif_num_images_test_t contains procedure, nopass, non_overridable :: subject procedure, nopass, non_overridable :: results end type contains pure function subject() result(test_subject) character(len=:), allocatable :: test_subject test_subject = "prif_num_images" end function function results() result(test_results) type(test_result_t), allocatable :: test_results(:) type(prif_num_images_test_t) prif_num_images_test allocate(test_results, source = prif_num_images_test%run([ & test_description_t("returning a valid number of images when invoked with no arguments", usher(check_num_images_valid)) & ])) end function function check_num_images_valid() result(diag) type(test_diagnosis_t) :: diag integer num_imgs call prif_num_images(num_images=num_imgs) diag = (num_imgs .isAtLeast. 1) // "positive number of images" end function end module prif_num_images_test_m fortran-caffeine-0.7.2/test/prif_types_test.F900000664000175000017500000001101715162221361021576 0ustar alastairalastair#include "test-utils.F90" module prif_types_test_m use iso_fortran_env, only: int8 use prif, only: prif_team_type, prif_event_type, prif_notify_type, prif_lock_type, prif_critical_type use julienne_m, only: test_description_t, test_diagnosis_t, test_result_t, test_t, string_t, usher & ,operator(.all.), operator(.also.), operator(.equalsExpected.), operator(.greaterThan.), operator(.isAtMost.), operator(//) implicit none private public :: prif_types_test_t type, extends(test_t) :: prif_types_test_t contains procedure, nopass, non_overridable :: subject procedure, nopass, non_overridable :: results end type type, private :: dummy_t private integer foo type(dummy_t), pointer :: p end type type :: pointer_wrapper_t private type(dummy_t), pointer :: info => null() end type contains pure function subject() character(len=:), allocatable :: subject subject = "PRIF Types" end function function results() result(test_results) type(test_result_t), allocatable :: test_results(:) type(prif_types_test_t) prif_types_test allocate(test_results, source = prif_types_test%run([ & test_description_t("having a compliant prif_team_type representation", usher(check_team_type)) & , test_description_t("having a compliant prif_event_type representation", usher(check_event_type)) & , test_description_t("having a compliant prif_lock_type representation", usher(check_lock_type)) & , test_description_t("having a compliant prif_notify_type representation", usher(check_notify_type)) & , test_description_t("having a compliant prif_critical_type representation", usher(check_critical_type)) & ])) end function function check_team_type() result(diag) type(test_diagnosis_t) :: diag type(prif_team_type) :: team type(pointer_wrapper_t) :: pointer_wrap type(dummy_t), target :: tgt diag = .true. ! size check ALSO(storage_size(team) .equalsExpected. storage_size(pointer_wrap)) ! default initialization check pointer_wrap%info => tgt pointer_wrap = transfer(team, pointer_wrap) ALSO2(.not. associated(pointer_wrap%info), "default initialization to null") end function function check_event_type() result(diag) type(test_diagnosis_t) :: diag type(prif_event_type) :: event integer :: ssz integer(int8), allocatable :: bytes(:) diag = .true. ! size check ssz = storage_size(event) ALSO(ssz .greaterThan. 0) ALSO(ssz .isAtMost. 64*8) ! default initialization check allocate(bytes(64)) bytes = transfer(event, bytes) ALSO2(.all.(int(bytes) .equalsExpected. 0), "default initialization to zero") end function function check_lock_type() result(diag) type(test_diagnosis_t) :: diag type(prif_lock_type) :: lock integer :: ssz integer(int8), allocatable :: bytes(:) diag = .true. ! size check ssz = storage_size(lock) ALSO(ssz .greaterThan. 0) ALSO(ssz .isAtMost. 64*8) ! default initialization check allocate(bytes(64)) bytes = transfer(lock, bytes) ALSO2(.all.(int(bytes) .equalsExpected. 0), "default initialization to zero") end function function check_notify_type() result(diag) type(test_diagnosis_t) :: diag type(prif_notify_type) :: notify integer :: ssz integer(int8), allocatable :: bytes(:) diag = .true. ! size check ssz = storage_size(notify) ALSO(ssz .greaterThan. 0) ALSO(ssz .isAtMost. 64*8) ! default initialization check allocate(bytes(64)) bytes = transfer(notify, bytes) ALSO2(.all.(int(bytes) .equalsExpected. 0), "default initialization to zero") end function function check_critical_type() result(diag) type(test_diagnosis_t) :: diag type(prif_critical_type) :: critical integer :: ssz integer(int8), allocatable :: bytes(:) diag = .true. ! size check ssz = storage_size(critical) ALSO(ssz .greaterThan. 0) ALSO(ssz .isAtMost. 64*8) ! default initialization check allocate(bytes(64)) bytes = transfer(critical, bytes) ALSO2(.all.(int(bytes) .equalsExpected. 0), "default initialization to zero") end function end module prif_types_test_m fortran-caffeine-0.7.2/test/prif_atomic_test.F900000664000175000017500000005125015162221361021711 0ustar alastairalastair#include "julienne-assert-macros.h" #include "test-utils.F90" module prif_atomic_test_m # include "test-uses-alloc.F90" use julienne_m, only: call_julienne_assert_, test_description_t, test_diagnosis_t, test_result_t, test_t, string_t, usher & ,operator(.also.), operator(.equalsExpected.), operator(.isAtLeast.), operator(.isAtMost.), operator(.lessThan.), operator(//) use prif implicit none private public :: prif_atomic_test_t type, extends(test_t) :: prif_atomic_test_t contains procedure, nopass, non_overridable :: subject procedure, nopass, non_overridable :: results end type ! define an .equalsExpected. for logical(PRIF_ATOMIC_LOGICAL_KIND) interface operator(.equalsExpected.) module procedure prif_logical_equals end interface operator(.equalsExpected.) contains pure function prif_logical_equals(lhs, rhs) result(diag) logical(PRIF_ATOMIC_LOGICAL_KIND), intent(in) :: lhs, rhs type(test_diagnosis_t) :: diag diag = test_diagnosis_t(logical(lhs .eqv. rhs), & string_t("expected ") // merge('T','F',rhs) // "; actual value is " // merge('T','F',lhs)) end function prif_logical_equals pure function subject() character(len=:), allocatable :: subject subject = "PRIF Atomics" end function function results() result(test_results) type(test_result_t), allocatable :: test_results(:) type(prif_atomic_test_t) prif_atomic_test allocate(test_results, source = prif_atomic_test%run([ & test_description_t("an uncontended atomic test", usher(check_atomic_uncontended)) & , test_description_t("a contended hot-spot atomic test", usher(check_atomic_contended)) & ])) end function subroutine test_srand(seed) integer, intent(in) :: seed integer :: size call random_seed(size=size) block integer :: vals(size) vals = seed call random_seed(put=vals) end block end subroutine function test_rand(lo, hi) result(result_) integer :: lo, hi, result_ real :: r call random_number(r) ! Generate a uniform random number in [0, 1) result_ = int(r * (hi - lo + 1)) + lo call_julienne_assert((result_ .isAtLeast. lo) .also. (result_ .isAtMost. hi)) end function function check_atomic_uncontended() result(diag) type(test_diagnosis_t) :: diag integer, parameter :: lim = 100 integer :: me, num_imgs, peer, i integer(PRIF_ATOMIC_INT_KIND) :: dummy_atomic_int logical(PRIF_ATOMIC_LOGICAL_KIND) :: dummy_atomic_logical integer(c_size_t) :: sizeof_atomic_int, sizeof_atomic_logical type(prif_coarray_handle) :: coarray_handle_int, coarray_handle_logical type(c_ptr) :: c_ptr_int, c_ptr_logical integer(c_intptr_t) :: base_addr_int, base_addr_logical diag = .true. sizeof_atomic_int = int(storage_size(dummy_atomic_int)/8, c_size_t) sizeof_atomic_logical = int(storage_size(dummy_atomic_logical)/8, c_size_t) ! Check an invariant of the current Caffeine impl, not required by PRIF: call_julienne_assert(sizeof_atomic_int .equalsExpected. 8_c_size_t) call_julienne_assert(sizeof_atomic_logical .equalsExpected. 8_c_size_t) call prif_num_images(num_images=num_imgs) call prif_this_image_no_coarray(this_image=me) call test_srand(me) ! integer(PRIF_ATOMIC_INT_KIND) :: atomic_int[*] call prif_allocate_coarray( & lcobounds = [1_c_int64_t], & ucobounds = [integer(c_int64_t)::], & size_in_bytes = sizeof_atomic_int, & final_func = c_null_funptr, & coarray_handle = coarray_handle_int, & allocated_memory = c_ptr_int) base_addr_int = transfer(c_ptr_int, base_addr_int) ! logical(PRIF_ATOMIC_LOGICAL_KIND) :: atomic_logical[*] call prif_allocate_coarray( & lcobounds = [1_c_int64_t], & ucobounds = [integer(c_int64_t)::], & size_in_bytes = sizeof_atomic_logical, & final_func = c_null_funptr, & coarray_handle = coarray_handle_logical, & allocated_memory = c_ptr_logical) base_addr_logical = transfer(c_ptr_logical, base_addr_logical) block integer(PRIF_ATOMIC_INT_KIND) :: value_int, expect_int logical(PRIF_ATOMIC_LOGICAL_KIND) :: value_logical, expect_logical ! local init and access operations do i=1, lim call prif_sync_all() ! only here for subtest isolation ! integer tests expect_int = me call prif_atomic_define_int(me, coarray_handle_int, 0_c_size_t, value=expect_int) call prif_atomic_ref_int(me, coarray_handle_int, 0_c_size_t, value=value_int) ALSO2(value_int .equalsExpected. expect_int, "local define direct / ref direct") call prif_atomic_ref_int_indirect(me, base_addr_int, value=value_int) ALSO2(value_int .equalsExpected. expect_int, "local define direct / ref indirect") expect_int = me * 100 call prif_atomic_define_int_indirect(me, base_addr_int, value=expect_int) call prif_atomic_ref_int_indirect(me, base_addr_int, value=value_int) ALSO2(value_int .equalsExpected. expect_int, "local define indirect / ref indirect") call prif_atomic_cas_int(me, coarray_handle_int, 0_c_size_t, & old=value_int, compare=expect_int, new=(expect_int*10)) ALSO2(value_int .equalsExpected. expect_int, "local cas direct") expect_int = expect_int * 10 call prif_atomic_cas_int_indirect(me, base_addr_int, & old=value_int, compare=expect_int, new=(expect_int*10)) ALSO2(value_int .equalsExpected. expect_int, "local cas indirect") expect_int = expect_int * 10 call prif_atomic_ref_int(me, coarray_handle_int, 0_c_size_t, value=value_int) ALSO2(value_int .equalsExpected. expect_int, "local cas / ref direct") expect_int = 0 call prif_atomic_define_int(me, coarray_handle_int, 0_c_size_t, value=expect_int) call prif_atomic_ref_int(me, coarray_handle_int, 0_c_size_t, value=value_int) ALSO2(value_int .equalsExpected. expect_int, "local define direct / ref direct (final)") call prif_sync_all() ! only here for subtest isolation ! logical tests expect_logical = (IOR(me,1) == 1) call prif_atomic_define_logical(me, coarray_handle_logical, 0_c_size_t, value=expect_logical) call prif_atomic_ref_logical(me, coarray_handle_logical, 0_c_size_t, value=value_logical) ALSO2(value_logical .equalsExpected. expect_logical, "local define direct / ref direct)") call prif_atomic_ref_logical_indirect(me, base_addr_logical, value=value_logical) ALSO2(value_logical .equalsExpected. expect_logical, "local define direct / ref indirect") expect_logical = .not. expect_logical call prif_atomic_define_logical_indirect(me, base_addr_logical, value=expect_logical) call prif_atomic_ref_logical_indirect(me, base_addr_logical, value=value_logical) ALSO2(value_logical .equalsExpected. expect_logical, "local define indirect / ref indirect") call prif_atomic_cas_logical(me, coarray_handle_logical, 0_c_size_t, & old=value_logical, compare=expect_logical, new=(.not. expect_logical)) ALSO2(value_logical .equalsExpected. expect_logical, "local cas direct") expect_logical = .not. expect_logical call prif_atomic_cas_logical_indirect(me, base_addr_logical, & old=value_logical, compare=expect_logical, new=(.not. expect_logical)) ALSO2(value_logical .equalsExpected. expect_logical, "local cas indirect") expect_logical = .not. expect_logical call prif_atomic_ref_logical(me, coarray_handle_logical, 0_c_size_t, value=value_logical) ALSO2(value_logical .equalsExpected. expect_logical, "local cas / ref direct") expect_logical = .false. call prif_atomic_define_logical(me, coarray_handle_logical, 0_c_size_t, value=expect_logical) call prif_atomic_ref_logical(me, coarray_handle_logical, 0_c_size_t, value=value_logical) ALSO2(value_logical .equalsExpected. expect_logical, "local define direct / ref direct (final)") end do call prif_sync_all() ! uncontended test targeting peer's location call_julienne_assert(expect_int .equalsExpected. 0_c_size_t) call_julienne_assert(logical(expect_logical .eqv. .false.)) peer = mod(me,num_imgs)+1 ! logical test do i=1, lim block character(:), allocatable :: test_desc logical(PRIF_ATOMIC_LOGICAL_KIND) :: tmp tmp = (IAND(i,1) == 1) select case (test_rand(1,3)) case (1) ; test_desc = "define" expect_logical = tmp call prif_atomic_define_logical(peer, coarray_handle_logical, 0_c_size_t, value=expect_logical) case (2) ; test_desc = "cas succeed" call prif_atomic_cas_logical(peer, coarray_handle_logical, 0_c_size_t, & old=value_logical, compare=expect_logical, new=tmp) ALSO2(value_logical .equalsExpected. expect_logical, "int cas direct succeed") expect_logical = tmp case (3) ; test_desc = "cas fail" call prif_atomic_cas_logical(peer, coarray_handle_logical, 0_c_size_t, & old=value_logical, compare=(.not. expect_logical), new=tmp) ALSO2(value_logical .equalsExpected. expect_logical, "int cas direct fail") case default ; test_desc = "internal error"; call_julienne_assert(.false.) end select call prif_atomic_ref_logical(peer, coarray_handle_logical, 0_c_size_t, value=value_logical) ALSO2(value_logical .equalsExpected. expect_logical, "result check for peer int "//test_desc) end block end do call prif_sync_all() ! only here for subtest isolation ! integer test do i=1, lim block character(:), allocatable :: test_desc integer(PRIF_ATOMIC_INT_KIND) :: tmp tmp = i select case (test_rand(1,11)) case (1) ; test_desc = "define" expect_int = i call prif_atomic_define_int(peer, coarray_handle_int, 0_c_size_t, value=expect_int) case (2) ; test_desc = "cas succeed" call prif_atomic_cas_int(peer, coarray_handle_int, 0_c_size_t, & old=value_int, compare=expect_int, new=tmp) ALSO2(value_int .equalsExpected. expect_int, "int cas direct succeed") expect_int = tmp case (3) ; test_desc = "cas fail" call prif_atomic_cas_int(peer, coarray_handle_int, 0_c_size_t, & old=value_int, compare=expect_int+1, new=tmp) ALSO2(value_int .equalsExpected. expect_int, "int cas direct fail") case (4) ; test_desc = "add" call prif_atomic_add(peer, coarray_handle_int, 0_c_size_t, value=tmp) expect_int = expect_int + tmp case (5) ; test_desc = "fetch_add" call prif_atomic_fetch_add(peer, coarray_handle_int, 0_c_size_t, value=tmp, old=value_int) ALSO2(value_int .equalsExpected. expect_int, "fetch_add fetch check") expect_int = expect_int + tmp case (6) ; test_desc = "and" call prif_atomic_and(peer, coarray_handle_int, 0_c_size_t, value=tmp) expect_int = IAND(expect_int, tmp) case (7) ; test_desc = "fetch_and" call prif_atomic_fetch_and(peer, coarray_handle_int, 0_c_size_t, value=tmp, old=value_int) ALSO2(value_int .equalsExpected. expect_int, "fetch_and fetch check") expect_int = IAND(expect_int, tmp) case (8) ; test_desc = "or" call prif_atomic_or(peer, coarray_handle_int, 0_c_size_t, value=tmp) expect_int = IOR(expect_int, tmp) case (9) ; test_desc = "fetch_or" call prif_atomic_fetch_or(peer, coarray_handle_int, 0_c_size_t, value=tmp, old=value_int) ALSO2(value_int .equalsExpected. expect_int, "fetch_or fetch check") expect_int = IOR(expect_int, tmp) case (10) ; test_desc = "xor" call prif_atomic_xor(peer, coarray_handle_int, 0_c_size_t, value=tmp) expect_int = IEOR(expect_int, tmp) case (11) ; test_desc = "fetch_xor" call prif_atomic_fetch_xor(peer, coarray_handle_int, 0_c_size_t, value=tmp, old=value_int) ALSO2(value_int .equalsExpected. expect_int, "fetch_xor fetch check") expect_int = IEOR(expect_int, tmp) case default ; test_desc = "internal error"; call_julienne_assert(.false.) end select call prif_atomic_ref_int(peer, coarray_handle_int, 0_c_size_t, value=value_int) ALSO2(value_int .equalsExpected. expect_int, "result check for peer int "//test_desc) end block end do end block call prif_deallocate_coarrays(([coarray_handle_int,coarray_handle_logical])) end function function check_atomic_contended() result(diag) type(test_diagnosis_t) diag integer, parameter :: lim = 100 integer :: me, num_imgs, root, i integer(PRIF_ATOMIC_INT_KIND) :: dummy_atomic_int logical(PRIF_ATOMIC_LOGICAL_KIND) :: dummy_atomic_logical integer(c_size_t) :: sizeof_atomic_int, sizeof_atomic_logical type(c_ptr) :: c_ptr_int, c_ptr_logical integer(c_intptr_t) :: base_addr_int, base_addr_logical integer(PRIF_ATOMIC_INT_KIND), parameter :: zero = 0, plus_one = 1, minus_one = -1 integer(PRIF_ATOMIC_INT_KIND) :: value_int, expect_int, tmp_int, my_bit logical(PRIF_ATOMIC_LOGICAL_KIND) :: false = .false. logical(PRIF_ATOMIC_LOGICAL_KIND) :: value_logical, expect_logical, tmp_logical character(len=:),allocatable :: desc diag = .true. sizeof_atomic_int = int(storage_size(dummy_atomic_int)/8, c_size_t) sizeof_atomic_logical = int(storage_size(dummy_atomic_logical)/8, c_size_t) call prif_num_images(num_images=num_imgs) call prif_this_image_no_coarray(this_image=me) call test_srand(me) root = 1 ! allocate centralized atomic variables on the root image if (me == root) then call prif_allocate(sizeof_atomic_int, c_ptr_int) base_addr_int = transfer(c_ptr_int, base_addr_int) call prif_atomic_define_int_indirect(me, base_addr_int, value=zero) call prif_allocate(sizeof_atomic_logical, c_ptr_logical) base_addr_logical = transfer(c_ptr_logical, base_addr_logical) call prif_atomic_define_logical_indirect(me, base_addr_logical, value=false) endif call prif_co_broadcast(base_addr_int, root) call prif_co_broadcast(base_addr_logical, root) desc = "integer add-up test " expect_int = zero do i=1, lim call prif_sync_all() call prif_atomic_add_indirect(root, base_addr_int, value=plus_one) call prif_atomic_add_indirect(root, base_addr_int, value=minus_one) call prif_atomic_fetch_add_indirect(root, base_addr_int, value=plus_one, old=value_int) ALSO2(value_int .isAtLeast. expect_int, desc//"mid-increment lower bound") ALSO2(value_int .lessThan. (expect_int + num_imgs), desc//"mid-increment upper bound") call prif_sync_all() expect_int = expect_int + num_imgs call prif_atomic_ref_int_indirect(root, base_addr_int, value=value_int) ALSO2(value_int .equalsExpected. expect_int, desc//"loop-bottom check") end do desc = "integer cas-up test " do i=1, MAX(2,lim/num_imgs**2) ! running time grows superlinearly with scale due to contention call prif_sync_all() tmp_int = expect_int do call prif_atomic_cas_int_indirect(root, base_addr_int, & old=value_int, compare=tmp_int, new=(tmp_int+1)) ALSO2(value_int .isAtLeast. expect_int, desc//"mid-increment lower bound") ALSO2(value_int .lessThan. (expect_int + num_imgs), desc//"mid-increment upper bound") if (value_int == tmp_int) exit ! success tmp_int = value_int ! collision => retry end do call prif_sync_all() expect_int = expect_int + num_imgs call prif_atomic_ref_int_indirect(root, base_addr_int, value=value_int) ALSO2(value_int .equalsExpected. expect_int, desc//"loop-bottom check") end do desc = "logical cas-toggle test " expect_logical = false do i=1, MAX(2,lim/num_imgs**2) ! running time grows superlinearly with scale due to contention call prif_sync_all() tmp_logical = expect_logical do call prif_atomic_cas_logical_indirect(root, base_addr_logical, & old=value_logical, compare=tmp_logical, new=(.not. tmp_logical)) if (value_logical .eqv. tmp_logical) exit ! success ALSO2(logical(value_logical .eqv. .not. tmp_logical), desc//"mid-swap sanity check") tmp_logical = value_logical ! collision => retry end do call prif_sync_all() expect_logical = merge(expect_logical, .not. expect_logical, mod(num_imgs,2) == 0) call prif_atomic_ref_logical_indirect(root, base_addr_logical, value=value_logical) ALSO2(value_logical .equalsExpected. expect_logical, desc//"loop-bottom check") end do call prif_sync_all() if (me == root) then call prif_atomic_define_int_indirect(me, base_addr_int, value=zero) endif call prif_sync_all() desc = "randomized integer bitwise test " if (me <= sizeof_atomic_int*8) then my_bit = SHIFTL(plus_one, me-1) else my_bit = 0 endif expect_int = 0 do i=1, lim block character(:), allocatable :: test_desc select case (test_rand(1,6)) case (1) ; test_desc = "and" call prif_atomic_and_indirect(root, base_addr_int, value=NOT(my_bit)) expect_int = IAND(expect_int, NOT(my_bit)) case (2) ; test_desc = "fetch_and" call prif_atomic_fetch_and_indirect(root, base_addr_int, value=NOT(my_bit), old=value_int) ALSO2(IAND(value_int,my_bit) .equalsExpected. expect_int, desc//"fetch_and fetch check") expect_int = IAND(expect_int, NOT(my_bit)) case (3) ; test_desc = "or" call prif_atomic_or_indirect(root, base_addr_int, value=my_bit) expect_int = IOR(expect_int, my_bit) case (4) ; test_desc = "fetch_or" call prif_atomic_fetch_or_indirect(root, base_addr_int, value=my_bit, old=value_int) ALSO2(IAND(value_int,my_bit) .equalsExpected. expect_int, desc//"fetch_or fetch check") expect_int = IOR(expect_int, my_bit) case (5) ; test_desc = "xor" call prif_atomic_xor_indirect(root, base_addr_int, value=my_bit) expect_int = IEOR(expect_int, my_bit) case (6) ; test_desc = "fetch_xor" call prif_atomic_fetch_xor_indirect(root, base_addr_int, value=my_bit, old=value_int) ALSO2(IAND(value_int,my_bit) .equalsExpected. expect_int, desc//"fetch_xor fetch check") expect_int = IEOR(expect_int, my_bit) case default ; test_desc = "internal error"; call_julienne_assert(.false.) end select call prif_atomic_ref_int_indirect(root, base_addr_int, value=value_int) ALSO2(IAND(value_int,my_bit) .equalsExpected. expect_int, desc//"result check for int "//test_desc) end block end do call prif_sync_all() if (me == root) then call prif_deallocate(c_ptr_int) call prif_deallocate(c_ptr_logical) endif end function end module fortran-caffeine-0.7.2/test/prif_co_reduce_test.F900000664000175000017500000002004415162221361022362 0ustar alastairalastair#include "test-utils.F90" #include "julienne-assert-macros.h" module prif_co_reduce_test_m use iso_c_binding, only: c_ptr, c_funptr, c_size_t, c_f_pointer, c_f_procpointer, c_funloc, c_loc, c_null_ptr, c_associated use prif, only : prif_co_reduce, prif_num_images, prif_this_image_no_coarray, prif_operation_wrapper_interface use julienne_m, only : & call_julienne_assert_ & ,operator(.all.) & ,operator(.also.) & ,operator(.approximates.) & ,operator(.equalsExpected.) & ,operator(.within.) & ,operator(//) & ,usher & ,string_t & ,test_description_t & ,test_diagnosis_t & ,test_result_t & ,test_t implicit none private public :: prif_co_reduce_test_t type, extends(test_t) :: prif_co_reduce_test_t contains procedure, nopass, non_overridable :: subject procedure, nopass, non_overridable :: results end type type :: pair integer :: fst real :: snd end type #if HAVE_PARAM_DERIVED type :: array(length) integer, len :: length = 2 integer :: elements(length) end type type :: reduction_context_data type(c_funptr) :: user_op integer :: length end type #endif integer, target :: dummy contains pure function subject() result(test_subject) character(len=:), allocatable :: test_subject test_subject = "prif_co_reduce" end function function results() result(test_results) type(test_result_t), allocatable :: test_results(:) type(prif_co_reduce_test_t) prif_co_reduce_test allocate(test_results, source = prif_co_reduce_test%run([ & test_description_t("performing a logical .and. reduction", usher(check_logical)) & ,test_description_t("performing a derived type reduction", usher(check_derived_type_reduction)) & #if HAVE_PARAM_DERIVED ,test_description_t("performing a parameterized derived type reduction", usher(check_type_parameter_reduction)) & #endif ])) end function function check_logical() result(diag) type(test_diagnosis_t) :: diag logical :: val integer :: me procedure(prif_operation_wrapper_interface), pointer :: op diag = .true. op => and_wrapper val = .true. call prif_co_reduce(val, op, c_null_ptr) ALSO(val) call prif_this_image_no_coarray(this_image=me) if (me == 1) then val = .false. end if call prif_co_reduce(val, op, c_null_ptr) ALSO(.not. val) end function subroutine and_wrapper(arg1, arg2_and_out, count, cdata) bind(C) type(c_ptr), intent(in), value :: arg1, arg2_and_out integer(c_size_t), intent(in), value :: count type(c_ptr), intent(in), value :: cdata logical, pointer :: lhs(:), rhs_and_result(:) integer(c_size_t) :: i if (count == 0) return ! this expression is buggy as of Julienne 3.6.0 (julienne#166) !call_julienne_assert(cdata .equalsExpected. c_null_ptr) call_julienne_assert(.not. c_associated(cdata)) call c_f_pointer(arg1, lhs, [count]) call c_f_pointer(arg2_and_out, rhs_and_result, [count]) do i = 1, count rhs_and_result(i) = lhs(i).and.rhs_and_result(i) end do end subroutine function check_derived_type_reduction() result(diag) type(test_diagnosis_t) :: diag type(pair), parameter :: values(*,*) = reshape( & [ pair(1, 53.), pair(3, 47.) & , pair(5, 43.), pair(7, 41.) & , pair(11, 37.), pair(13, 31.) & , pair(17, 29.), pair(19, 23.) & ], & [2, 4]) integer :: me, ni, i type(pair), dimension(size(values,1)) :: my_val, expected type(pair), dimension(:,:), allocatable :: tmp procedure(prif_operation_wrapper_interface), pointer :: op real, parameter :: tolerance = 0D0 op => pair_adder call prif_this_image_no_coarray(this_image=me) call prif_num_images(ni) my_val = values(:, mod(me-1, size(values,2))+1) call prif_co_reduce(my_val, op, c_loc(dummy)) allocate(tmp(size(values,1),ni)) tmp = reshape([(values(:, mod(i-1,size(values,2))+1), i = 1, ni)], [size(values,1),ni]) #if defined(__GFORTRAN__) ! gfortran 14 lacks the F18 intrinsic REDUCE() block integer :: j do i = 1, size(tmp,1) expected(i) = tmp(i,1) do j = 2, size(tmp,2) expected(i) = add_pair(expected(i), tmp(i,j)) end do end do end block #else expected = reduce(tmp, add_pair, dim=2) #endif diag = .all. (my_val%fst .equalsExpected. expected%fst) & .also. (.all. ( my_val%snd .approximates. expected%snd .within. tolerance)) end function pure function add_pair(lhs, rhs) result(total) type(pair), intent(in) :: lhs, rhs type(pair) :: total total%fst = lhs%fst + rhs%fst total%snd = lhs%snd + rhs%snd end function subroutine pair_adder(arg1, arg2_and_out, count, cdata) bind(C) type(c_ptr), intent(in), value :: arg1, arg2_and_out integer(c_size_t), intent(in), value :: count type(c_ptr), intent(in), value :: cdata type(pair), pointer :: lhs(:), rhs_and_result(:) integer(c_size_t) :: i if (count == 0) return call_julienne_assert(cdata .equalsExpected. c_loc(dummy)) call c_f_pointer(arg1, lhs, [count]) call c_f_pointer(arg2_and_out, rhs_and_result, [count]) do i = 1, count rhs_and_result(i) = add_pair(lhs(i), rhs_and_result(i)) end do end subroutine #if HAVE_PARAM_DERIVED ! As of LLVM20, flang does not implement the types used by this test: ! flang/lib/Lower/ConvertType.cpp:482: not yet implemented: parameterized derived types ! error: Actual argument associated with TYPE(*) dummy argument 'a=' may not have a parameterized derived type ! Gfortran 14.2 also lacks the type support for this test: ! Error: Derived type 'pdtarray' at (1) is being used before it is defined function check_type_parameter_reduction() result(diag) type(test_diagnosis_t) :: diag type(array), parameter :: values(*,*) = reshape( & [ array(elements=[1, 53]), array(elements=[3, 47]) & , array(elements=[5, 43]), array(elements=[7, 41]) & , array(elements=[11, 37]), array(elements=[13, 31]) & , array(elements=[17, 29]), array(elements=[19, 23]) & ], & [2, 4]) integer :: me, ni, i type(array(values%length)), dimension(size(values,1)) :: my_val, expected procedure(prif_operation_wrapper_interface), pointer :: op type(reduction_context_data), target :: context op => array_wrapper context%user_op = c_funloc(add_array) context%length = values%length call prif_this_image_no_coarray(this_image=me) call prif_num_images(ni) my_val = values(:, mod(me-1, size(values,2))+1) call prif_co_reduce(my_val, op, c_loc(context)) expected = reduce(reshape([(values(:, mod(i-1,size(values,2))+1), i = 1, ni)], [size(values,1),ni]), add_array, dim=2) diag = .all. (my_val%elements .equalsExpected. expected%elements) end function pure function add_array(lhs, rhs) result(total) type(array), intent(in) :: lhs, rhs type(array) :: total total%elements = lhs%elements + rhs%elements end function subroutine array_wrapper(arg1, arg2_and_out, count, cdata) bind(C) type(c_ptr), intent(in), value :: arg1, arg2_and_out integer(c_size_t), intent(in), value :: count type(c_ptr), intent(in), value :: cdata type(reduction_context_data), pointer :: context if (count == 0) return call c_f_pointer(cdata, context) block abstract interface pure function op_interface(lhs, rhs) result(res) import :: array, context implicit none type(array(context%length)), intent(in) :: lhs, rhs type(array(context%length)) :: res end function end interface procedure(op_interface), pointer :: op type(array(context%length)), pointer :: lhs(:), rhs_and_result(:) integer(c_size_t) :: i call c_f_procpointer(context%user_op, op) call c_f_pointer(arg1, lhs, [count]) call c_f_pointer(arg2_and_out, rhs_and_result, [count]) do i = 1, count rhs_and_result(i) = op(lhs(i), rhs_and_result(i)) end do end block end subroutine #endif /* HAVE_PARAM_DERIVED */ end module prif_co_reduce_test_m fortran-caffeine-0.7.2/test/prif_image_queries_test.F900000664000175000017500000000556115162221361023260 0ustar alastairalastairmodule prif_image_queries_test_m use iso_c_binding, only: c_int use prif, only : prif_image_status, prif_stopped_images, prif_failed_images, PRIF_STAT_FAILED_IMAGE, PRIF_STAT_STOPPED_IMAGE use prif, only : prif_num_images use julienne_m, only: & operator(//) & ,operator(.all.) & ,operator(.also.) & ,operator(.isAtLeast.) & ,operator(.isAtMost.) & ,operator(.lessThan.) & ,operator(.expect.) & ,usher & ,test_description_t & ,test_diagnosis_t & ,test_result_t & ,test_t implicit none private public :: prif_image_queries_test_t type, extends(test_t) :: prif_image_queries_test_t contains procedure, nopass, non_overridable :: subject procedure, nopass, non_overridable :: results end type contains pure function subject() result(test_subject) character(len=:), allocatable :: test_subject test_subject = "PRIF Image Queries" end function function results() result(test_results) type(test_result_t), allocatable :: test_results(:) type(prif_image_queries_test_t) prif_image_queries_test allocate(test_results, source = prif_image_queries_test%run([ & test_description_t("providing valid prif_image_status()", usher(check_image_status)) & ,test_description_t("providing valid prif_stopped_images()", usher(check_stopped_images)) & ,test_description_t("providing valid prif_failed_images()", usher(check_failed_images)) & ])) end function function check_image_status() result(diag) type(test_diagnosis_t) :: diag integer(c_int) :: image_status call prif_image_status(1, image_status=image_status) ! TODO: replace with .any. once Julienne issue #138 is implemented diag = .expect. (any(image_status == [0, PRIF_STAT_FAILED_IMAGE, PRIF_STAT_STOPPED_IMAGE])) & // "permitted image status" end function function valid_image_list(nums) result(diag) integer, allocatable, intent(in) :: nums(:) type(test_diagnosis_t) :: diag integer ni call prif_num_images(num_images=ni) diag = & allocated(nums) .also. & (size(nums) .isAtMost. ni) .also. & (.all. (nums .isAtLeast. 1)) .also. & (.all. (nums .isAtMost. ni)) .also. & (.all. (nums(1:size(nums)-1) .lessThan. nums(2:size(nums)))) // "valid stopped image" end function function check_stopped_images() result(diag) type(test_diagnosis_t) :: diag integer, allocatable :: nums(:) call prif_stopped_images(stopped_images=nums) diag = valid_image_list(nums) end function function check_failed_images() result(diag) type(test_diagnosis_t) :: diag integer, allocatable :: nums(:) call prif_failed_images(failed_images=nums) diag = valid_image_list(nums) end function end module prif_image_queries_test_m fortran-caffeine-0.7.2/test/test-uses-alloc.F900000664000175000017500000000246415162221361021405 0ustar alastairalastair! This header provides use declarations needed for tests using prif_(de)allocate(_coarray), ! and hides the interface differences between PRIF spec revisions. ! It must be #included within a use block. #ifndef CAF_INCLUDED_TEST_USES_ALLOC #define CAF_INCLUDED_TEST_USES_ALLOC #include "language-support.F90" use prif, only : & prif_allocate_coarray, & prif_allocate, prif_deallocate, & prif_coarray_handle #if !defined(CAF_PRIF_VERSION) || CAF_PRIF_VERSION >= 7 ! PRIF 0.7+ deallocate use prif, only : prif_deallocate_coarray, prif_deallocate_coarrays # define prif_deallocate_coarray3 prif_deallocate_coarray # define prif_deallocate_coarrays3 prif_deallocate_coarrays #else ! emulate PRIF 0.7 deallocate with older interfaces use prif, only : prif_deallocate_coarray_ => prif_deallocate_coarray # define prif_deallocate_coarray(h) prif_deallocate_coarray_([h]) # define prif_deallocate_coarrays(arr) prif_deallocate_coarray_(arr) # define prif_deallocate_coarray3(h,a2,a3) prif_deallocate_coarray_([h],a2,a3) # define prif_deallocate_coarrays3(arr,a2,a3) prif_deallocate_coarray_(arr,a2,a3) #endif use iso_c_binding, only: & c_ptr, c_int, c_int64_t, c_size_t, c_intptr_t, & c_null_funptr, c_null_ptr, & c_associated, c_f_pointer, c_funloc, c_loc, c_sizeof #endif fortran-caffeine-0.7.2/test/prif_error_stop_test.F900000664000175000017500000000704615162221361022637 0ustar alastairalastairmodule prif_error_stop_test_m use unit_test_parameters_m, only : expected_error_stop_code, & image_one => subjob_setup, cmd_prefix => subjob_prefix, fpm_driver use julienne_m, only: test_description_t, test_diagnosis_t, test_result_t, test_t, usher & ,operator(.expect.), operator(.equalsExpected.), operator(//) implicit none private public :: prif_error_stop_test_t type, extends(test_t) :: prif_error_stop_test_t contains procedure, nopass, non_overridable :: subject procedure, nopass, non_overridable :: results end type integer, parameter :: max_message_len = 128 contains pure function subject() character(len=:), allocatable :: subject subject = "prif_error_stop" end function function results() result(test_results) type(test_result_t), allocatable :: test_results(:) type(prif_error_stop_test_t) prif_error_stop_test allocate(test_results, source = prif_error_stop_test%run([ & test_description_t("delivering a non-zero exitstat when the stop code is omitted", usher(exit_with_no_stop_code)) & ,test_description_t("printing a character stop code and delivering a non-zero exitstat", usher(exit_with_character_stop_code)) & ,test_description_t("printing an integer stop code and delivering the non-zero exitstat", usher(exit_with_integer_stop_code)) & ])) end function function exit_with_no_stop_code() result(diag) type(test_diagnosis_t) :: diag integer exit_status integer command_status character(len=max_message_len) command_message if (image_one()) then command_message = "exit_with_no_stop_code" call execute_command_line( & command = cmd_prefix//fpm_driver//" run --example error_stop_with_no_code > /dev/null 2>&1" & ,wait = .true. & ,exitstat = exit_status & ,cmdstat = command_status & ,cmdmsg = command_message & ) diag = .expect. (exit_status /= 0) // command_message else diag = .true. end if end function function exit_with_integer_stop_code() result(diag) type(test_diagnosis_t) :: diag integer exit_status integer command_status character(len=max_message_len) command_message if (image_one()) then command_message = "exit_with_integer_stop_code" call execute_command_line( & command = cmd_prefix//fpm_driver//" run --example error_stop_with_integer_code > /dev/null 2>&1" & ,wait = .true. & ,exitstat = exit_status & ,cmdstat = command_status & ,cmdmsg = command_message & ) diag = (exit_status .equalsExpected. expected_error_stop_code) // command_message else diag = .true. end if end function function exit_with_character_stop_code() result(diag) type(test_diagnosis_t) :: diag integer exit_status integer command_status character(len=max_message_len) command_message if (image_one()) then command_message = "exit_with_character_stop_code" call execute_command_line( & command = cmd_prefix//fpm_driver//" run --example error_stop_with_character_code > /dev/null 2>&1" & ,wait = .true. & ,exitstat = exit_status & ,cmdstat = command_status & ,cmdmsg = command_message & ) diag = .expect. (exit_status /= 0) // command_message else diag = .true. end if end function end module prif_error_stop_test_m fortran-caffeine-0.7.2/test/prif_this_image_test.F900000664000175000017500000000317415162221361022550 0ustar alastairalastairmodule prif_this_image_no_coarray_test_m use prif, only : prif_this_image_no_coarray, prif_num_images, prif_co_sum use julienne_m, only: & operator(//) & ,operator(.all.) & ,operator(.equalsExpected.) & ,usher & ,test_description_t & ,test_diagnosis_t & ,test_result_t & ,test_t implicit none private public :: prif_this_image_no_coarray_test_t type, extends(test_t) :: prif_this_image_no_coarray_test_t contains procedure, nopass, non_overridable :: subject procedure, nopass, non_overridable :: results end type contains pure function subject() result(test_subject) character(len=:), allocatable :: test_subject test_subject = "prif_this_image_no_coarray" end function function results() result(test_results) type(test_result_t), allocatable :: test_results(:) type(prif_this_image_no_coarray_test_t) prif_this_image_no_coarray_test allocate(test_results, source = prif_this_image_no_coarray_test%run([ & test_description_t("returning a unique member of {1,...,num_images()} when called without arguments", usher(check_this_image_set)) & ])) end function function check_this_image_set() result(diag) type(test_diagnosis_t) :: diag integer, allocatable :: image_numbers(:) integer i, me, ni call prif_this_image_no_coarray(this_image=me) call prif_num_images(num_images=ni) allocate(image_numbers, source = [(merge(0, me, me/=i), i = 1, ni)]) call prif_co_sum(image_numbers) diag = .all. (image_numbers .equalsExpected. [(i, i = 1, ni)]) // "correct image set" end function end module prif_this_image_no_coarray_test_m fortran-caffeine-0.7.2/test/prif_coarray_inquiry_test.F900000664000175000017500000001153015162221361023652 0ustar alastairalastair#include "test-utils.F90" module prif_coarray_inquiry_test_m # include "test-uses-alloc.F90" use prif, only : & prif_coarray_handle, prif_num_images, & prif_local_data_pointer, prif_size_bytes, & prif_lcobound_no_dim, prif_lcobound_with_dim, & prif_ucobound_no_dim, prif_ucobound_with_dim, & prif_coshape use julienne_m, only: & operator(//) & ,operator(.all.) & ,operator(.also.) & ,operator(.equalsExpected.) & ,usher & ,string_t & ,test_description_t & ,test_diagnosis_t & ,test_result_t & ,test_t implicit none private public :: prif_coarray_inquiry_test_t type, extends(test_t) :: prif_coarray_inquiry_test_t contains procedure, nopass, non_overridable :: subject procedure, nopass, non_overridable :: results end type contains pure function subject() result(test_subject) character(len=:), allocatable :: test_subject test_subject = "PRIF Coarray Inquiries" end function function results() result(test_results) type(test_result_t), allocatable :: test_results(:) type(prif_coarray_inquiry_test_t) prif_coarray_inquiry_test allocate(test_results, source = prif_coarray_inquiry_test%run([ & test_description_t("preserving the prif_local_data_pointer for an allocated coarray", usher(check_prif_local_data_pointer)) & ,test_description_t("checking passed cobounds", usher(check_cobounds)) & ])) end function function check_prif_local_data_pointer() result(diag) type(test_diagnosis_t) :: diag integer :: dummy_element type(prif_coarray_handle) :: coarray_handle type(c_ptr) :: allocation_ptr, local_ptr call prif_allocate_coarray( & [integer(c_int64_t):: 1], & [integer(c_int64_t)::], & int(storage_size(dummy_element)/8, c_size_t), & c_null_funptr, & coarray_handle, & allocation_ptr) call prif_local_data_pointer(coarray_handle, local_ptr) diag = c_associated(local_ptr, allocation_ptr) call prif_deallocate_coarray(coarray_handle) end function impure elemental function check_cobound(corank, omit_trailing) result(diag) type(test_diagnosis_t) :: diag integer(c_int), intent(in) :: corank logical, intent(in) :: omit_trailing ! Allocate memory for an integer scalar coarray with given corank ! and then test some queries on it integer :: num_imgs, i integer(kind=c_int64_t), dimension(corank) :: lcobounds, ucobounds, tmp_bounds integer(kind=c_int64_t), dimension(corank-1) :: leading_ucobounds integer(kind=c_int64_t) :: tmp_bound integer(kind=c_size_t), dimension(corank) :: sizes type(prif_coarray_handle) :: coarray_handle type(c_ptr) :: allocated_memory integer(c_size_t) :: data_size, query_size diag = .true. call prif_num_images(num_images=num_imgs) lcobounds(1) = 1 ucobounds(1) = num_imgs do i = 2,corank lcobounds(i) = i ucobounds(i) = i + merge(1,0,mod(i,2)==0) end do allocated_memory = c_null_ptr data_size = 64 * corank if (omit_trailing) then leading_ucobounds = ucobounds(1:corank-1) call prif_allocate_coarray( lcobounds, leading_ucobounds, data_size, c_null_funptr, & coarray_handle, allocated_memory) else call prif_allocate_coarray( lcobounds, ucobounds, data_size, c_null_funptr, & coarray_handle, allocated_memory) end if if (corank > 1) ucobounds(corank) = lcobounds(corank) ! trailing ucobound gets rounded down ALSO(c_associated(allocated_memory)) call prif_size_bytes(coarray_handle, data_size=query_size) ALSO2(query_size .equalsExpected. data_size, "prif_size_bytes is valid") call prif_lcobound_no_dim(coarray_handle, tmp_bounds) ALSO2(.all. (tmp_bounds .equalsExpected. lcobounds), "prif_lcobound_no_dim is valid") call prif_ucobound_no_dim(coarray_handle, tmp_bounds) ALSO2(.all. (tmp_bounds .equalsExpected. ucobounds), "prif_ucobound_no_dim is valid") do i = 1, corank call prif_lcobound_with_dim(coarray_handle, i, tmp_bound) ALSO2(tmp_bound .equalsExpected. lcobounds(i), "prif_lcobound_with_dim is valid") call prif_ucobound_with_dim(coarray_handle, i, tmp_bound) ALSO2(tmp_bound .equalsExpected. ucobounds(i), "prif_ucobound_with_dim is valid") end do call prif_coshape(coarray_handle, sizes) ALSO2(.all. ((ucobounds - lcobounds + 1) .equalsExpected. sizes), "prif_coshape is valid") call prif_deallocate_coarray(coarray_handle) end function function check_cobounds() result(diag) type(test_diagnosis_t) :: diag integer(c_int) :: corank diag = .true. ALSO(.all. check_cobound([(corank, corank = 1_c_int, 15_c_int)], .false.)) ALSO(.all. check_cobound([(corank, corank = 1_c_int, 15_c_int)], .true.)) end function end module prif_coarray_inquiry_test_m fortran-caffeine-0.7.2/test/prif_image_index_test.F900000664000175000017500000003764515162221361022722 0ustar alastairalastair#include "test-utils.F90" module prif_image_index_test_m # include "test-uses-alloc.F90" use prif, only: & prif_image_index, prif_num_images, & prif_team_type, prif_get_team, & prif_this_image_no_coarray, & prif_form_team, prif_change_team, prif_end_team, & prif_image_index_with_team, prif_image_index_with_team_number, & prif_initial_team_index, prif_initial_team_index_with_team, prif_initial_team_index_with_team_number, & prif_this_image_with_coarray, prif_this_image_with_dim, & prif_lcobound_no_dim, prif_ucobound_no_dim, & prif_num_images_with_team, PRIF_INITIAL_TEAM use julienne_m, only: test_description_t, test_diagnosis_t, test_result_t, test_t, string_t, usher & ,operator(.also.), operator(.equalsExpected.), operator(.isAtLeast.), operator(.isAtMost.), operator(//) implicit none private public :: prif_image_index_test_t type, extends(test_t) :: prif_image_index_test_t contains procedure, nopass, non_overridable :: subject procedure, nopass, non_overridable :: results end type contains pure function subject() character(len=:), allocatable :: subject subject = "prif_image_index and prif_initial_team_index" end function function results() result(test_results) type(test_result_t), allocatable :: test_results(:) type(prif_image_index_test_t) prif_image_index_test allocate(test_results, source = prif_image_index_test%run([ & test_description_t("returning 1 for the simplest case", usher(check_simple_case)) & ,test_description_t("returning 1 when given the lower bounds", usher(check_lower_bounds)) & ,test_description_t("returning 0 with invalid subscripts", usher(check_invalid_subscripts)) & ,test_description_t("returning the expected answer for a more complicated case w/corank=2", usher(check_complicated_2d)) & ,test_description_t("returning the expected answer for a more complicated case w/corank=3", usher(check_complicated_3d)) & ,test_description_t("returning the expected answer with a child team and corank=2", usher(check_complicated_2d_team)) & ])) end function function check_this_image_coarray(coarray_handle, corank, team) result(diag) type(prif_coarray_handle) :: coarray_handle integer(c_int) :: corank type(prif_team_type), optional :: team type(test_diagnosis_t) :: diag integer(c_int64_t) :: co, cosubscripts(corank), colbound(corank), coubound(corank) integer(c_int) :: i, me, me_initial type(prif_team_type) :: initial_team diag = .true. call prif_get_team(PRIF_INITIAL_TEAM, team=initial_team) call prif_lcobound_no_dim(coarray_handle, colbound) call prif_ucobound_no_dim(coarray_handle, coubound) call prif_this_image_no_coarray(team, me) call prif_this_image_no_coarray(initial_team, me_initial) call prif_this_image_with_coarray(coarray_handle, team=team, cosubscripts=cosubscripts) do i=1,corank call prif_this_image_with_dim(coarray_handle, dim=i, team=team, cosubscript=co) ALSO(co .equalsExpected. cosubscripts(i)) ALSO(co .isAtLeast. colbound(i)) if (i /= corank) ALSO(co .isatMost. coubound(i)) ! trailing will differ with team end do ! verify reverse mapping if (present(team)) then call prif_image_index_with_team(coarray_handle, cosubscripts, team, i) else call prif_image_index(coarray_handle, cosubscripts, i) end if ALSO(i .equalsExpected. me) ! and prif_initial_team_index if (present(team)) then call prif_initial_team_index_with_team(coarray_handle, cosubscripts, team, i) else call prif_initial_team_index(coarray_handle, cosubscripts, i) end if ALSO(i .equalsExpected. me_initial) end function function check_simple_case() result(diag) type(test_diagnosis_t) :: diag type(prif_coarray_handle) :: coarray_handle type(c_ptr) :: allocated_memory integer(c_int) :: answer diag = .true. call prif_allocate_coarray( & lcobounds = [1_c_int64_t], & ucobounds = [integer(c_int64_t)::], & size_in_bytes = 1_c_size_t, & final_func = c_null_funptr, & coarray_handle = coarray_handle, & allocated_memory = allocated_memory) call prif_image_index(coarray_handle, [1_c_int64_t], image_index=answer) ALSO(answer .equalsExpected. 1_c_int ) call prif_initial_team_index(coarray_handle, [1_c_int64_t], initial_team_index=answer) ALSO(answer .equalsExpected. 1_c_int) ALSO(check_this_image_coarray(coarray_handle, 1)) call prif_deallocate_coarray(coarray_handle) end function function check_lower_bounds() result(diag) type(test_diagnosis_t) :: diag type(prif_coarray_handle) :: coarray_handle type(c_ptr) :: allocated_memory integer(c_int) :: answer diag = .true. call prif_allocate_coarray( & lcobounds = [2_c_int64_t, 3_c_int64_t], & ucobounds = [3_c_int64_t], & size_in_bytes = 1_c_size_t, & final_func = c_null_funptr, & coarray_handle = coarray_handle, & allocated_memory = allocated_memory) call prif_image_index(coarray_handle, [2_c_int64_t, 3_c_int64_t], image_index=answer) ALSO(answer .equalsExpected. 1_c_int) call prif_initial_team_index(coarray_handle, [2_c_int64_t, 3_c_int64_t], initial_team_index=answer) ALSO(answer .equalsExpected. 1_c_int) ALSO(check_this_image_coarray(coarray_handle, 2)) call prif_deallocate_coarray(coarray_handle) end function function check_invalid_subscripts() result(diag) type(test_diagnosis_t) diag type(prif_coarray_handle) :: coarray_handle type(c_ptr) :: allocated_memory integer(c_int) :: answer diag = .true. call prif_allocate_coarray( & lcobounds = [-2_c_int64_t, 2_c_int64_t], & ucobounds = [2_c_int64_t], & size_in_bytes = 1_c_size_t, & final_func = c_null_funptr, & coarray_handle = coarray_handle, & allocated_memory = allocated_memory) call prif_image_index(coarray_handle, [-1_c_int64_t, 1_c_int64_t], image_index=answer) ALSO(answer .equalsExpected. 0_c_int) ALSO(check_this_image_coarray(coarray_handle, 2)) call prif_deallocate_coarray(coarray_handle) end function function check_complicated_2d() result(diag) type(test_diagnosis_t) :: diag type(prif_coarray_handle) :: coarray_handle type(c_ptr) :: allocated_memory integer(c_int) :: answer, ni, expected diag = .true. call prif_num_images(num_images=ni) call prif_allocate_coarray( & lcobounds = [1_c_int64_t, 2_c_int64_t], & ucobounds = [2_c_int64_t], & size_in_bytes = 1_c_size_t, & final_func = c_null_funptr, & coarray_handle = coarray_handle, & allocated_memory = allocated_memory) call prif_image_index(coarray_handle, [1_c_int64_t, 3_c_int64_t], image_index=answer) expected = merge(3_c_int,0_c_int,ni >= 3) ALSO(answer .equalsExpected. expected) if (expected > 0) then call prif_initial_team_index(coarray_handle, [1_c_int64_t, 3_c_int64_t], initial_team_index=answer) ALSO(answer .equalsExpected. expected) end if ALSO(check_this_image_coarray(coarray_handle, 2)) call prif_deallocate_coarray(coarray_handle) end function function check_complicated_3d() result(diag) type(test_diagnosis_t) diag type(prif_coarray_handle) :: coarray_handle type(c_ptr) :: allocated_memory integer(c_int) :: answer, ni, expected type(prif_team_type) :: initial_team diag = .true. call prif_get_team(team=initial_team) call prif_num_images_with_team(team=initial_team, num_images=ni) call prif_allocate_coarray( & lcobounds = [1_c_int64_t, 0_c_int64_t, 0_c_int64_t], & ucobounds = [2_c_int64_t, 1_c_int64_t], & size_in_bytes = 1_c_size_t, & final_func = c_null_funptr, & coarray_handle = coarray_handle, & allocated_memory = allocated_memory) call prif_image_index_with_team(coarray_handle, & [2_c_int64_t, 1_c_int64_t, 1_c_int64_t], & team=initial_team, image_index=answer) expected = merge(8_c_int,0_c_int,ni >= 8) ALSO(answer .equalsExpected. expected) if (expected > 0) then call prif_initial_team_index_with_team(coarray_handle, & [2_c_int64_t, 1_c_int64_t, 1_c_int64_t], & team=initial_team, initial_team_index=answer) ALSO(answer .equalsExpected. expected) endif ALSO(check_this_image_coarray(coarray_handle, 3)) call prif_deallocate_coarray(coarray_handle) end function function check_complicated_2d_team() result(diag) type(test_diagnosis_t) diag type(prif_coarray_handle) :: coarray_handle type(c_ptr) :: allocated_memory integer(c_int) :: answer, ni, cni, me integer(c_int64_t) :: which_team type(prif_team_type) :: initial_team, child_team diag = .true. call prif_get_team(team=initial_team) call prif_num_images_with_team(team=initial_team, num_images=ni) call prif_this_image_no_coarray(this_image=me) call prif_allocate_coarray( & lcobounds = [0_c_int64_t, 2_c_int64_t], & ucobounds = [1_c_int64_t], & size_in_bytes = 1_c_size_t, & final_func = c_null_funptr, & coarray_handle = coarray_handle, & allocated_memory = allocated_memory) which_team = merge(2_c_int64_t, 1_c_int64_t, mod(me, 2) == 0) call prif_form_team(team_number = which_team, team = child_team) call prif_change_team(child_team) call prif_num_images_with_team(team=child_team, num_images=cni) ! image_index lcobound call prif_image_index_with_team(coarray_handle, & [0_c_int64_t, 2_c_int64_t], & team=initial_team, image_index=answer) ALSO(answer .equalsExpected. 1_c_int) call prif_image_index_with_team_number(coarray_handle, & [0_c_int64_t, 2_c_int64_t], & team_number=-1_c_int64_t, image_index=answer) ALSO(answer .equalsExpected. 1_c_int) call prif_image_index_with_team(coarray_handle, & [0_c_int64_t, 2_c_int64_t], & team=child_team, image_index=answer) ALSO(answer .equalsExpected. 1_c_int) call prif_image_index_with_team_number(coarray_handle, & [0_c_int64_t, 2_c_int64_t], & team_number=which_team, image_index=answer) ALSO(answer .equalsExpected. 1_c_int) call prif_image_index(coarray_handle, & [0_c_int64_t, 2_c_int64_t], & image_index=answer) ALSO(answer .equalsExpected. 1_c_int) ! initial_team_index lcobound call prif_initial_team_index_with_team(coarray_handle, & [0_c_int64_t, 2_c_int64_t], & initial_team, answer) ALSO(answer .equalsExpected. 1_c_int) call prif_initial_team_index_with_team_number(coarray_handle, & [0_c_int64_t, 2_c_int64_t], & -1_c_int64_t, answer) ALSO(answer .equalsExpected. 1_c_int) call prif_initial_team_index_with_team(coarray_handle, & [0_c_int64_t, 2_c_int64_t], & child_team, answer) ALSO(answer .equalsExpected. merge(1_c_int,2_c_int,which_team==1)) call prif_initial_team_index_with_team_number(coarray_handle, & [0_c_int64_t, 2_c_int64_t], & which_team, answer) ALSO(answer .equalsExpected. merge(1_c_int,2_c_int,which_team==1)) call prif_initial_team_index(coarray_handle, & [0_c_int64_t, 2_c_int64_t], & answer) ALSO(answer .equalsExpected. merge(1_c_int,2_c_int,which_team==1)) ! image_index 3 call prif_image_index_with_team(coarray_handle, & [0_c_int64_t, 3_c_int64_t], & team=initial_team, image_index=answer) ALSO(answer .equalsExpected. merge(3_c_int,0_c_int,ni >= 3)) call prif_image_index_with_team_number(coarray_handle, & [0_c_int64_t, 3_c_int64_t], & team_number=-1_c_int64_t, image_index=answer) ALSO(answer .equalsExpected. merge(3_c_int,0_c_int,ni >= 3)) call prif_image_index_with_team(coarray_handle, & [0_c_int64_t, 3_c_int64_t], & team=child_team, image_index=answer) ALSO(answer .equalsExpected. merge(3_c_int,0_c_int,cni >= 3)) call prif_image_index_with_team_number(coarray_handle, & [0_c_int64_t, 3_c_int64_t], & team_number=which_team, image_index=answer) ALSO(answer .equalsExpected. merge(3_c_int,0_c_int,cni >= 3)) call prif_image_index(coarray_handle, & [0_c_int64_t, 3_c_int64_t], & image_index=answer) ALSO(answer .equalsExpected. merge(3_c_int,0_c_int,cni >= 3)) ! initial_team_index 3 if (ni >= 3) then call prif_initial_team_index_with_team(coarray_handle, & [0_c_int64_t, 3_c_int64_t], & team=initial_team, initial_team_index=answer) ALSO(answer .equalsExpected. 3_c_int) call prif_initial_team_index_with_team_number(coarray_handle, & [0_c_int64_t, 3_c_int64_t], & team_number=-1_c_int64_t, initial_team_index=answer) ALSO(answer .equalsExpected. 3_c_int) end if if (cni >= 3) then call prif_initial_team_index_with_team(coarray_handle, & [0_c_int64_t, 3_c_int64_t], & team=child_team, initial_team_index=answer) ALSO(answer .equalsExpected. merge(5_c_int,6_c_int,which_team==1)) call prif_initial_team_index_with_team_number(coarray_handle, & [0_c_int64_t, 3_c_int64_t], & team_number=which_team, initial_team_index=answer) ALSO(answer .equalsExpected. merge(5_c_int,6_c_int,which_team==1)) call prif_initial_team_index(coarray_handle, & [0_c_int64_t, 3_c_int64_t], & initial_team_index=answer) ALSO(answer .equalsExpected. merge(5_c_int,6_c_int,which_team==1)) end if ALSO(check_this_image_coarray(coarray_handle, 2, initial_team)) ALSO(check_this_image_coarray(coarray_handle, 2, child_team)) call prif_end_team() call prif_deallocate_coarray(coarray_handle) end function end module fortran-caffeine-0.7.2/test/prif_init_test.F900000664000175000017500000000367615162221361021411 0ustar alastairalastairmodule prif_init_test_m use prif, only : prif_init, PRIF_STAT_ALREADY_INIT use julienne_m, only: test_description_t, test_diagnosis_t, test_result_t, test_t, operator(.equalsExpected.), usher implicit none private public :: prif_init_test_t, check_caffeination type, extends(test_t) :: prif_init_test_t contains procedure, nopass, non_overridable :: subject procedure, nopass, non_overridable :: results end type contains pure function subject() result(test_subject) character(len=:), allocatable :: test_subject test_subject = "prif_init" end function function results() result(test_results) type(test_result_t), allocatable :: test_results(:) type(prif_init_test_t) prif_init_test allocate(test_results, source = prif_init_test%run([ & test_description_t("completing successfully", usher(check_caffeination)) & ,test_description_t("returning PRIF_STAT_ALREADY_INIT on a subsequent call ", usher(check_subsequent_prif_init_call)) & ])) end function function check_caffeination() result(diag) ! this test needs to run very early at startup, so we memoize the result type(test_diagnosis_t) :: diag type(test_diagnosis_t), save :: memo logical, save :: first_pass = .true. if (first_pass) then first_pass = .false. block #if HAVE_MULTI_IMAGE integer, parameter :: successful_initiation = PRIF_STAT_ALREADY_INIT #else integer, parameter :: successful_initiation = 0 #endif integer init_exit_code call prif_init(init_exit_code) memo = init_exit_code .equalsExpected. successful_initiation end block endif diag = memo end function function check_subsequent_prif_init_call() result(diag) type(test_diagnosis_t) :: diag integer stat call prif_init(stat) call prif_init(stat) diag = stat .equalsExpected. PRIF_STAT_ALREADY_INIT end function end module prif_init_test_m fortran-caffeine-0.7.2/README.md0000664000175000017500000002756215162221361016407 0ustar alastairalastairCaffeine ======== **CoArray Fortran Framework of Efficient Interfaces to Network Environments** Caffeine is a parallel runtime library that aims to support Fortran compilers with a programming-model-agnostic application interface to various communication libraries. Current work is on supporting the Parallel Runtime Interface for Fortran ([PRIF](#citing-prif-please-use-the-following-publication)) with the [GASNet-EX] exascale-ready networking middleware. The Caffeine team anticipates collaborating on separate alternative PRIF implementations using other communication substrates such as the Message Passing Interface ([MPI]). ![Caffeine system stack diagram](https://github.com/BerkeleyLab/caffeine/wiki/img/caffeine-stack.gif) Statement of need ----------------- The Fortran programming language standard added features supporting single-program, multiple-data (SPMD) parallel programming and loop parallelism beginning with Fortran 2008. In Fortran, SPMD programming involves the creation of a fixed number of images (instances) of a program that execute asynchronously in shared or distributed memory, except where a program uses specific synchronization mechanisms. Fortran's coarray feature provides distributed data structures that offer a subscripted, multidimensional array notation defining a partitioned global address space (PGAS). One image can use a coindexed notation to perform one-sided access of coarray data associated with another image. Fortran 2018 greatly expanded this feature set to include concepts such as teams (groupings) of images, events (counting semaphores), collective subroutines and failed-image detection (fault tolerance). Fortran 2023 provided additional, minor multi-image extensions, including notified remote data access. Several popular Fortran compilers, including [LLVM Flang](https://flang.llvm.org/docs/FortranStandardsSupport.html) and [LFortran](https://github.com/lfortran/lfortran), currently lack complete support for multi-image parallel execution. These features are a mandatory part of Fortran, and thus are an important part of reaching full compliance with the 2008, 2018, or 2023 revisions of the Fortran standard. The latest LLVM Flang 22 release adds experimental support for a [meaningful subset](https://flang.llvm.org/docs/FortranStandardsSupport.html#fortran-2018) of multi-image Fortran features using PRIF and Caffeine. For more details, see [LLVM-HPC2025 paper](#Additional-Publications) below. Caffeine provides a portable, high-performance and open-source parallel runtime library that compilers can target in code generation as part of their solution to support Fortran's multi-image parallel features. Prerequisites & Dependencies ------------- ### Build prerequisites The `install.sh` script uses the following packages: * Fortran and C compilers * We regularly test with: LLVM Flang versions 19:22 and GNU Fortran versions 13:15 * [Fortran package manager] `fpm` * [pkg-config] * [realpath] * [make] * [git] * [curl] The script will invoke these if present in a user's `PATH`. If not present, the script will ask permission to use [Homebrew] to install the relevant package or, in some cases, ask the user to install the package. ### Build dependencies Caffeine also depends on the following packages that will be automatically installed as part of the build process. * [GASNet-EX] exascale networking middleware * [assert](https://go.lbl.gov/assert) * [julienne](https://go.lbl.gov/julienne) Caffeine leverages the following non-parallel features of Fortran to simplify the writing of a portable, compact runtime-library that supports Fortran's parallel features: | Feature | Introduced in | |-------------------------------------------|---------------| | The `iso_c_binding` module | Fortran 2003 | | The `contiguous` attribute | Fortran 2008 | | Submodule support [1] | Fortran 2008 | | The `ISO_Fortran_binding.h` C header file | Fortran 2018 | | Assumed-type dummy arguments: `type(*)` | Fortran 2018 | | Assumed-rank dummy arguments: `array(..)` | Fortran 2018 | [1] This feature simplifies development but is not essential to the package Download, build, and run an example ----------------------------------- Here is an outline of the basic commands used to build Caffeine and run an example: ``` git clone https://github.com/BerkeleyLab/caffeine.git cd caffeine env FC= CC= CXX= ./install.sh env CAF_IMAGES=8 ./run-fpm.sh run --example hello ``` If `$FC` or `$CC` are unset, then `install.sh` will look for LLVM-based compilers in the `PATH`, and failing that, offer to install such compilers using Homebrew. The selected compilers MUST be "compatible": for the best experience you are HIGHLY recommended to specify the language frontends provided by a single version of a given compiler suite installation. The C++ compiler is optional for single-node deployments (and can be disabled using command-line option `--without-cxx`), but C++ is required for some network backends. *Note for macOS/Homebrew users*: Homebrew may be used to install [LLVM flang](https://formulae.brew.sh/formula/flang) for use with Caffeine, however by default Homebrew will not replace the incompatible Apple-provided `clang` in your `PATH`. Such users are recommended to use Homebrew's matching LLVM C/C++ compilers (usually installed in `/opt/homebrew/opt/llvm/bin`) to ensure that a matching set of LLVM compilers are used. The `install.sh` recognizes a number of command-line options and environment variables to customize behavior for your system. See the output of `./install.sh --help` for full documentation, including options for how to build for a distributed-memory platform. Example Usage ------------- The Caffeine parallel runtime is intended as an embedded compilation target library, to provide multi-image parallel runtime support to a Fortran compiler. As such, real usage of Caffeine is specific to the host Fortran compiler, and one should consult compiler-provided documentation regarding the use of Caffeine to back multi-image features. However we provide an [example hello world program](example/hello.F90), written in Fortran, simulating the PRIF calls that a theoretical source-to-source Fortran compiler might generate for a simple program written using Fortran's multi-image features to print a message from each image. Run tests --------- After installation, one can optionally issue the following command to run Caffeine's correctness unit tests to exercise the PRIF subroutines: ``` ./run-fpm.sh test ``` Note that some unit tests are conditionally skipped based on platform and configuration details. Recognized Environment Variables -------------------------------- The following environment variables control the execution of the `fpm`-driven Caffeine unit test suite: * `CAF_IMAGES`: integer that indicates the number of images to run * `SUBJOB_PREFIX`: command prefix to use for recursive `fpm` invocations in the test suite. Set `SUBJOB_PREFIX=skip` to disable such invocations (recommended for distributed-memory systems). The following environment variables control the behavior of the Caffeine library: * `CAF_HEAP_SIZE=128MB`: set the size of the shared-memory heap used for coarray storage, defaults to 128 MiB * `CAF_COMP_FRAC=0.10`: set the fraction of the shared-memory heap reserved for non-symmetric allocation, defaults to 10% Caffeine is built atop the [GASNet-EX] exascale networking middleware, which has its own set of environment variable knobs to control network-level behavior. Here are *a few* of the most useful GASNet knobs: * `GASNET_VERBOSEENV=1`: enable console output of all the envvar settings affecting GASNet operation * `GASNET_SPAWN_VERBOSE=1`: enable verbose console output of parallel job-spawning steps * `GASNET_BACKTRACE=1`: enable automatic backtrace upon fatal errors * `GASNET_SSH_SERVERS="host1 host2"`: space-deliminted list of hostnames for distributed-memory job launch using the ssh-spawner See [GASNet documentation](https://gasnet.lbl.gov/dist-ex/README) for full details on all settings. PRIF Implementation Status -------------------------- ### Caffeine is an implementation of the [Parallel Runtime Interface for Fortran (PRIF)](#citing-prif-please-use-the-following-publication) For details on the PRIF features that are implemented, please see the [Implementation Status doc](docs/implementation-status.md). Publications ------------ ### Citing Caffeine? Please use the following publication: Damian Rouson, Dan Bonachea. "[**Caffeine: CoArray Fortran Framework of Efficient Interfaces to Network Environments**](https://github.com/BerkeleyLab/caffeine/wiki/pubs/Caffeine_for_LLVM-2022.pdf)", Proceedings of the [Eighth Annual Workshop on the LLVM Compiler Infrastructure in HPC (LLVM-HPC2022)](https://web.archive.org/web/20230605003029/https://llvm-hpc-2022-workshop.github.io/), November 2022. Paper: [Talk Slides](https://github.com/BerkeleyLab/caffeine/wiki/pubs/Caffeine_for_LLVM-2022-Slides.pdf) ### Citing PRIF? Please use the following publication: Dan Bonachea, Katherine Rasmussen, Brad Richardson, Damian Rouson. "[**Parallel Runtime Interface for Fortran (PRIF): A Multi-Image Solution for LLVM Flang**](https://github.com/BerkeleyLab/caffeine/wiki/pubs/LLVM-HPC24_PRIF.pdf)", Proceedings of the [Tenth Annual Workshop on the LLVM Compiler Infrastructure in HPC (LLVM-HPC2024)](https://web.archive.org/web/20241006163246/https://llvm-hpc-2024-workshop.github.io/), November 2024. Paper: [Talk Slides](https://github.com/BerkeleyLab/caffeine/wiki/pubs/LLVM-HPC24_PRIF_Slides.pdf) ### PRIF Specification: PRIF Committee. "[**Parallel Runtime Interface for Fortran (PRIF) Specification, Revision 0.7**](https://github.com/BerkeleyLab/caffeine/wiki/pubs/PRIF_0.7.pdf)", Lawrence Berkeley National Laboratory Technical Report (LBNL-2001721), Dec 2025. ### Additional Publications: Dan Bonachea, Katherine Rasmussen, Damian Rouson, Jean-Didier Pailleux, Etienne Renault, Brad Richardson. "[**Lowering and Runtime Support for Fortran's Multi-Image Parallel Features using LLVM Flang, PRIF, and Caffeine**](https://github.com/BerkeleyLab/caffeine/wiki/pubs/LLVM-HPC25_FlangPRIFCaffeine.pdf)", Proceedings of the [Eleventh Annual Workshop on the LLVM Compiler Infrastructure in HPC (LLVM-HPC2025)](https://llvm-in-hpc-workshop.github.io/LLVM-HPC-2025-Workshop.github.io/), November 2025. Paper: [Talk Slides](https://github.com/BerkeleyLab/caffeine/wiki/pubs/LLVM-HPC25_FlangPRIFCaffeine_Slides.pdf) Funding ------- The Computer Languages and Systems Software ([CLaSS]) Group at [Berkeley Lab] has developed Caffeine on funding from the Exascale Computing Project ([ECP](https://www.exascaleproject.org)) and the Stewardship for Programming Systems and Tools ([S4PST](https://s4pst.org)) project, part of the Consortium for the Advancement of Scientific Software ([CASS](https://cass.community/)). Support and Licensing --------------------- See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines on reporting defects, feature requests and contributing to Caffeine. See [LICENSE.txt](LICENSE.txt) for usage terms and conditions. [GASNet-EX]: https://gasnet.lbl.gov [CLaSS]: https://go.lbl.gov/class [Berkeley Lab]: https://lbl.gov [MPI]: https://www.mpi-forum.org [Homebrew]: https://brew.sh [Fortran package manager]: https://github.com/fortran-lang/fpm [pkg-config]: https://www.freedesktop.org/wiki/Software/pkg-config/ [realpath]: https://man7.org/linux/man-pages/man3/realpath.3.html [make]: https://www.gnu.org/software/make/ [git]: https://git-scm.com [curl]: https://curl.se fortran-caffeine-0.7.2/example/0000775000175000017500000000000015162221361016547 5ustar alastairalastairfortran-caffeine-0.7.2/example/hello.F900000664000175000017500000000132515162221361020133 0ustar alastairalastairprogram hello_world use iso_c_binding, only: c_bool use prif, only : & prif_init & ,prif_this_image_no_coarray & ,prif_num_images & ,prif_stop & ,prif_error_stop & ,PRIF_STAT_ALREADY_INIT implicit none integer :: init_exit_code, me, num_imgs logical(kind=c_bool), parameter :: false = .false._c_bool call prif_init(init_exit_code) if (init_exit_code /= 0 .and. init_exit_code /= PRIF_STAT_ALREADY_INIT) then call prif_error_stop(quiet=false, stop_code_char="program startup failed") end if call prif_this_image_no_coarray(this_image=me) call prif_num_images(num_images=num_imgs) print *, "Hello from image", me, "of", num_imgs call prif_stop(quiet=false) end program fortran-caffeine-0.7.2/example/support-test/0000775000175000017500000000000015162221361021240 5ustar alastairalastairfortran-caffeine-0.7.2/example/support-test/stop_with_integer_code.F900000664000175000017500000000106615162221361026252 0ustar alastairalastairprogram stop_with_integer_code use iso_c_binding, only: c_bool use prif, only : & prif_init & ,prif_stop & ,prif_error_stop use unit_test_parameters_m, only : expected_stop_code implicit none integer init_exit_code logical(kind=c_bool), parameter :: false = .false._c_bool call prif_init(init_exit_code) call prif_stop(quiet=false, stop_code_int=expected_stop_code) ! a prif_stop unit test passes if this line executes normal termination call prif_error_stop(quiet=false) ! a prif_stop unit test fails if this line runs end program fortran-caffeine-0.7.2/example/support-test/out_of_memory.F900000664000175000017500000000310015162221361024375 0ustar alastairalastairprogram out_of_memory use iso_c_binding, only: c_bool, c_size_t, c_ptr, c_null_funptr, c_int64_t use prif implicit none integer :: init_exit_code, me, i integer(c_size_t) :: size_in_bytes = ishft(500_c_size_t, 40) type(c_ptr) :: allocated_memory logical :: coarray = .false. character(len=256) :: arg call prif_init(init_exit_code) if (init_exit_code /= 0 .and. init_exit_code /= PRIF_STAT_ALREADY_INIT) then call prif_error_stop(quiet=.false._c_bool, stop_code_char="program startup failed") end if call prif_this_image_no_coarray(this_image=me) do i = 1, command_argument_count() call get_command_argument(i, arg) if (trim(arg) == "--coarray" .or. trim(arg) == "-c") then coarray = .true. else read(arg, *) size_in_bytes end if end do if (coarray) then if (me == 1) print *, "prif_allocate_coarray: ", size_in_bytes, " bytes" block integer(c_int64_t), dimension(1) :: lcobounds, ucobounds integer :: num_imgs type(prif_coarray_handle) :: coarray_handle call prif_num_images(num_images=num_imgs) lcobounds(1) = 1 ucobounds(1) = num_imgs call prif_allocate_coarray( & lcobounds, ucobounds, size_in_bytes, c_null_funptr, & coarray_handle, allocated_memory) end block else if (me == 1) print *, "prif_allocate: ", size_in_bytes, " bytes" call prif_sync_all() call prif_allocate(size_in_bytes, allocated_memory) end if call prif_sync_all() call prif_error_stop(quiet=.false._c_bool, stop_code_char="test failed") end program fortran-caffeine-0.7.2/example/support-test/register_stop_callback.F900000664000175000017500000000172515162221361026232 0ustar alastairalastairprogram register_stop_callback use iso_c_binding, only: c_bool, c_int use prif, only : & prif_init, & prif_register_stop_callback, & prif_stop, & prif_stop_callback_interface implicit none integer init_exit_code logical(kind=c_bool), parameter :: false = .false._c_bool ! use of the pointer is unnecessary according to the standard, ! but gfortran complains without it procedure(prif_stop_callback_interface), pointer :: callback_ptr callback_ptr => callback call prif_init(init_exit_code) call prif_register_stop_callback(callback_ptr) call prif_stop(false) contains subroutine callback(is_error_stop, quiet, stop_code_int, stop_code_char) logical(c_bool), intent(in) :: is_error_stop, quiet integer(c_int), intent(in), optional :: stop_code_int character(len=*), intent(in), optional :: stop_code_char print *, "callback invoked" end subroutine end programfortran-caffeine-0.7.2/example/support-test/stop_with_character_code.F900000664000175000017500000000100515162221361026542 0ustar alastairalastairprogram stop_with_character_code use iso_c_binding, only: c_bool use prif, only : & prif_init & ,prif_stop & ,prif_error_stop implicit none integer init_exit_code logical(kind=c_bool), parameter :: false = .false._c_bool call prif_init(init_exit_code) call prif_stop(quiet=false, stop_code_char="USER_PROVIDED_STRING") ! a prif_stop unit test passes if this line executes normal termination call prif_error_stop(quiet=false) ! a prif_stop unit test fails if this line runs end program fortran-caffeine-0.7.2/example/support-test/README.md0000664000175000017500000000063515162221361022523 0ustar alastairalastairTest Support ------------ The programs in this directory intentionally terminate to support the `stop` and `error stop` unit tests, which use Fortran's `execute_command_line` to run the programs in this directory and to check for the expected non-zero stop codes. Running the tests in this manner enables the tests to continue executing after the child process launched by `execute_command_line` terminates. fortran-caffeine-0.7.2/example/support-test/exit_case.F900000664000175000017500000000307015162221361023464 0ustar alastairalastairprogram hello_world use iso_c_binding, only: c_bool use iso_fortran_env, only: output_unit,error_unit use prif, only : & prif_init & ,prif_this_image_no_coarray & ,prif_num_images & ,prif_stop & ,prif_error_stop & ,prif_sync_all & ,PRIF_STAT_ALREADY_INIT implicit none integer :: init_exit_code, me, num_imgs, exitcase = 1 logical(kind=c_bool), parameter :: false = .false._c_bool, true = .true._c_bool character(len=256) :: arg_string call prif_init(init_exit_code) if (init_exit_code /= 0 .and. init_exit_code /= PRIF_STAT_ALREADY_INIT) then call prif_error_stop(quiet=false, stop_code_char="program startup failed") end if call prif_this_image_no_coarray(this_image=me) call prif_num_images(num_images=num_imgs) if (command_argument_count() > 0) then call get_command_argument(1, arg_string) read(arg_string, *) exitcase end if if (me == 1) write(output_unit,*) "testing exit case ", exitcase call prif_sync_all() write(output_unit,'(A,I1,A,I1)') "stdout from image ", me, " of ", num_imgs write(error_unit,'(A,I1,A,I1)') "stderr from image ", me, " of ", num_imgs call prif_sync_all() select case (exitcase) case (1) call prif_stop(quiet=true, stop_code_int=exitcase+100) case (2) call prif_stop(quiet=false, stop_code_int=exitcase+100) case (3) if (me == num_imgs) call prif_error_stop(quiet=true, stop_code_int=exitcase+100) case default if (me == num_imgs) call prif_error_stop(quiet=false, stop_code_int=exitcase+100) end select call prif_sync_all() end program fortran-caffeine-0.7.2/example/support-test/stop_with_no_code.F900000664000175000017500000000070715162221361025232 0ustar alastairalastairprogram stop_with_no_code use iso_c_binding, only: c_bool use prif, only : & prif_init & ,prif_stop & ,prif_error_stop implicit none integer init_exit_code logical(kind=c_bool), parameter :: false = .false._c_bool call prif_init(init_exit_code) call prif_stop(false) ! a prif_stop test passes if this line executes normal termination call prif_error_stop(quiet=false) ! a prif_stop test fails if this line runs end program fortran-caffeine-0.7.2/example/support-test/error_stop_with_integer_code.F900000664000175000017500000000112015162221361027452 0ustar alastairalastairprogram error_stop_with_integer_code use iso_c_binding, only: c_bool use prif, only : & prif_init & ,prif_stop & ,prif_error_stop use unit_test_parameters_m, only : expected_error_stop_code implicit none integer init_exit_code logical(kind=c_bool), parameter :: true = .true._c_bool call prif_init(init_exit_code) call prif_error_stop(quiet=true, stop_code_int=expected_error_stop_code) ! a prif_error_stop unit test passes if this line executes error termination call prif_stop(quiet=true) ! a prif_error_stop unit tests fails if this line runs end program fortran-caffeine-0.7.2/example/support-test/fail_image.F900000664000175000017500000000105315162221361023574 0ustar alastairalastairprogram fail_image use iso_c_binding, only: c_bool, c_int use prif, only : & prif_init & ,prif_num_images & ,prif_error_stop & ,prif_fail_image implicit none integer init_exit_code integer(c_int) :: ni logical(kind=c_bool), parameter :: false = .false._c_bool call prif_init(init_exit_code) call prif_num_images(ni) if (ni > 1) PRINT *, "WARNING: This support test is only intended to be run with a single image" call prif_fail_image() call prif_error_stop(quiet=false) ! test fails if this line runs end program fortran-caffeine-0.7.2/example/support-test/error_stop_with_character_code.F900000664000175000017500000000102615162221361027756 0ustar alastairalastairprogram error_stop_with_character_code use iso_c_binding, only: c_bool use prif, only : & prif_init & ,prif_stop & ,prif_error_stop implicit none integer init_exit_code logical(kind=c_bool), parameter :: false = .false._c_bool call prif_init(init_exit_code) call prif_error_stop(quiet=false, stop_code_char="USER_PROVIDED_STRING") ! a prif_error_stop unit test passes if this line executes error termination call prif_stop(quiet=false) ! a prif_error_stop unit test fails if this line runs end program fortran-caffeine-0.7.2/example/support-test/error_stop_with_no_code.F900000664000175000017500000000076415162221361026446 0ustar alastairalastairprogram error_stop_with_no_code use iso_c_binding, only: c_bool use prif, only : & prif_init & ,prif_stop & ,prif_error_stop implicit none integer init_exit_code logical(kind=c_bool), parameter :: true = .true._c_bool call prif_init(init_exit_code) call prif_error_stop(quiet=true) ! a prif_error_stop unit test passes if this line correctly executes error termination call prif_stop(quiet=true) ! a prif_error_stop unit test fails if this line runs end program fortran-caffeine-0.7.2/CONTRIBUTING.md0000664000175000017500000001016415162221361017347 0ustar alastairalastairContributing to Caffeine -------------------------- ## Reporting Defects or Suggesting Features If you encounter problems or limitations when installing or using Caffeine, please do the following: 1. Search the Caffeine [issues](https://github.com/berkeleylab/caffeine/issues), including [closed issues]. If your search finds a report of the same problem, please post a comment in the issue. 2. Email the Caffeine [mailing list](mailto:fortran@lbl.gov) for advice. 3. If steps 1 or 2 do not resolve the problem, please file a [new issue] including - [ ] The Fortran compiler and compiler version used with Caffeine, - [ ] The complete output of the install and build commands run with `--verbose` argument, - [ ] The Caffeine version number or commit hash, - [ ] Any conditions required to reproduce the problem such as - [ ] The output of `uname -a` showing the operating system (OS), OS version, and processor architecture, - [ ] The number of images executed (e.g., the output of `echo $CAF_IMAGES`), - [ ] The command used to run your program (e.g., `./run-fpm.sh run`), and - [ ] A minimal reproducer: if possible, fewer than 50 lines demonstrating an issue. ## Contributing Code or Documentation We welcome help with diagnosing, isolating and fixing problems or adding features! All contributions are governed by the Caffeine [LICENSE.txt](./LICENSE.txt). To contribute, please follow these steps: - [ ] First please follow the [above steps](#reporting-defects-or-suggesting-features) and include a description of your proposed contribution. - [ ] Fork the Caffeine repository into your GitHub account - [ ] Create a new local branch for your work. - [ ] Name your branch according to the issue created. For example `fix-issue-53` or `issue-53-feature`. - [ ] Follow the coding conventions in [docs/README-maintainers.md](./docs/README-maintainers.md). - [ ] Make your commits logically atomic, self-consistent, and cohesive. - [ ] Add one or more unit tests in the `test` subdirectory to verify your fix or feature. - [ ] Ensure that your branch passes all tests (via `./run-fpm.sh test` with appropriate flags). - [ ] Update the [README.md](./README.md) if your branch affects anything described there. - [ ] Push your branch to your fork. - [ ] Open a [Pull Request](https://github.com/berkeleylab/caffeine/pulls) (PR) against an existing branch of the Berkeley Lab [Caffeine repository](https://github.com/berkeleylab/caffeine). - [ ] Please include the corresponding issue number in the PR title. - [ ] If your PR is not ready for merging, please click the downward arrow next to the "Create pull request" button and select the "Create draft pull request" option before submitting. - [ ] Watch for CI results on your PR and address any failures. - [ ] Please be patient and responsive to comments on your PR. ## Current and Past Contributors Caffeine is an open-source project and welcomes community participation in the development process. Notable current and past contributors include: * [Dan Bonachea](https://go.lbl.gov/dan-bonachea) [@bonachea](https://github.com/bonachea) [![View ORCID record] 0000-0002-0724-9349](https://orcid.org/0000-0002-0724-9349) * [Katherine Rasmussen](https://go.lbl.gov/katherine-rasmussen) [@ktras](https://github.com/ktras) [![View ORCID record] 0000-0001-7974-1853](https://orcid.org/0000-0001-7974-1853) * [Brad Richardson](https://everythingfunctional.com/) [@everythingfunctional](https://github.com/everythingfunctional) [![View ORCID record] 0000-0002-3205-2169](https://orcid.org/0000-0002-3205-2169) * [Damian Rouson](https://go.lbl.gov/damian-rouson) [@rouson](https://github.com/rouson) [![View ORCID record] 0000-0002-2344-868X](https://orcid.org/0000-0002-2344-868X) You can also browse the [full list of repository contributors](https://github.com/BerkeleyLab/caffeine/graphs/contributors). --- [Long or Frequently Used URLs]: # [View ORCID record]: https://github.com/BerkeleyLab/caffeine/wiki/img/ORCID-small.png [closed issues]: https://github.com/berkeleylab/caffeine/issues?q=is%3Aissue+is%3Aclosed [new issue]: https://github.com/berkeleylab/caffeine/issues/new fortran-caffeine-0.7.2/install.sh0000775000175000017500000004630215162221361017126 0ustar alastairalastair#!/bin/bash set -e # exit on error print_usage_info() { cat <<'EOF' Caffeine Installation Script USAGE: ./install.sh [--help | [--prefix=PREFIX] --help Display this help text --prefix=PREFIX Install library into 'PREFIX' directory --network= Build Caffeine to target given GASNet network conduit. should be one of: smp: single-node shared-memory conduit (default) udp: portable UDP/IP (for Ethernet networks) ibv: InfiniBand IB Verbs ofi: OpenFabrics Interfaces ucx: Unified Communication X --prereqs Display a list of prerequisite software. Default prefix='\$HOME/.local/bin' --verbose Show verbose build commands --yes Assume (yes) to all prompts for non-interactive build All unrecognized arguments will be passed to GASNet's configure. Some influential environment variables: FC Fortran compiler command FFLAGS Fortran compiler flags CC C compiler command CFLAGS C compiler flags CPP C preprocessor CPPFLAGS C preprocessor flags, e.g. -I if you have headers in a nonstandard directory LDFLAGS linker flags, e.g. -L if you have libraries in a nonstandard directory LIBS libraries to pass to the linker, e.g. -l Use these variables to override the choices made by the installer or to help it to find libraries and programs with nonstandard names/locations. Report bugs to fortran@lbl.gov or at https://go.lbl.gov/caffeine EOF } GASNET_VERSION="2025.8.0" VERBOSE="" GASNET_CONDUIT="${GASNET_CONDUIT:-smp}" YES=false APPEND_CFLAGS="" list_prerequisites() { cat << EOF Caffeine and this installer were developed with the following prerequisites. If any are missing and if permission is granted, the installer will install the latest versions using Homebrew: LLVM flang GASNet-EX $GASNET_VERSION fpm git (used by fpm to clone dependencies) curl pkg-config realpath (Homebrew coreutils) GNU Make (Homebrew coreutils) EOF } # GASNET_CONFIGURE_ARGS is deliberately inherited from the caller environment GASNET_CONFIGURE_ARGS=${GASNET_CONFIGURE_ARGS:=} while [ "$1" != "" ]; do orig_arg="$1" PARAM=$(echo "$1" | awk -F= '{print $1}') VALUE=$(echo "$1" | awk -F= '{print $2}') case $PARAM in -h | --help) print_usage_info exit ;; --prereqs) list_prerequisites exit ;; --prefix) PREFIX=$VALUE ;; --network) GASNET_CONDUIT=$(tr '[:upper:]' '[:lower:]' <<< $VALUE) case $GASNET_CONDUIT in smp|udp|mpi|ibv|ofi|ucx) ;; *) echo "ERROR: Unrecognized --network=$GASNET_CONDUIT" print_usage_info exit 1 esac ;; --verbose) VERBOSE="--verbose" set -x ;; -y | --yes) YES="true" ;; *) # We pass the unmodified argument to GASNet configure # Quoting is believed sufficient for embedded whitespace but not quotes GASNET_CONFIGURE_ARGS+="${GASNET_CONFIGURE_ARGS+ }\"${orig_arg//\"/\\\"}\"" ;; esac shift done # Early check for pre-installed Homebrew BREW="${BREW:-brew}" if command -v "$BREW" > /dev/null 2>&1; then BREW_PREFIX=`$BREW --prefix || exit 0` if [ -z ${BREW_PREFIX:+x} ] || [ ! -d "$BREW_PREFIX" ] ; then echo Warning: Failed to detect Homebrew prefix BREW_PREFIX= fi fi if [ -z ${FC:+x} ] || [ -z ${CC:+x} ]; then if command -v flang > /dev/null 2>&1; then FC=`which flang` echo "Setting FC=$FC" if [ -n "$BREW_PREFIX" ] && [[ $FC =~ $BREW_PREFIX ]] ; then # We are using Homebrew flang, so prefer Homebrew clang/clang++ export PATH="$BREW_PREFIX/opt/llvm/bin:$PATH" fi fi if command -v clang > /dev/null 2>&1; then CC=`which clang` echo "Setting CC=$CC" fi fi if [ -n "$CC" ] && ! command -v "$CC" > /dev/null 2>&1; then echo "CC=$CC not found. If you don't yet have a C compiler, please leave environment variable CC unset." exit 1 fi if [ -n "$FC" ] && ! command -v "$FC" > /dev/null 2>&1; then echo "FC=$FC not found. If you don't yet have a Fortran compiler, please leave environment variable FC unset." exit 1 fi if [ -z ${CXX:+x} ] && [ -n "$CC" ] ; then # C++ is an optional dependency # try to auto-detect from CC if [[ $(basename $CC) =~ clang ]] ; then CXX_guess=clang++ else CXX_guess=g++ fi if [[ $CC =~ (-[0-9]+)$ ]] ; then CXX_guess=${CXX_guess}${BASH_REMATCH[0]} fi if command -v $CXX_guess > /dev/null 2>&1; then CXX=`which $CXX_guess` echo "Setting CXX=$CXX" fi fi set -u # error on use of undefined variable if command -v pkg-config > /dev/null 2>&1; then PKG_CONFIG=`which pkg-config` fi if command -v realpath > /dev/null 2>&1; then REALPATH=`which realpath` fi if command -v make > /dev/null 2>&1; then MAKE=`which make` fi if command -v fpm > /dev/null 2>&1; then FPM=`which fpm` fi if ! command -v git > /dev/null 2>&1; then echo "git not found. Building Caffeine requires fpm, which uses git to download dependencies." echo "Please install git, ensure it is in your PATH, and rerun ./install.sh" exit 1 fi if ! command -v curl > /dev/null 2>&1; then echo "curl not found. Please install curl, ensure it is in your PATH, and rerun ./install.sh" exit 1 fi ask_permission_to_use_homebrew() { cat << EOF Either one or more of the environment variables FC and CC are unset or one or more of the following packages are not in the PATH: pkg-config, realpath, make, fpm. If you grant permission to install prerequisites, you will be prompted before each installation. Press 'Enter' to choose the square-bracketed default answer: EOF printf "Is it ok to use Homebrew to install prerequisite packages? [yes] " } ask_permission_to_install_homebrew() { cat << EOF Homebrew not found. Installing Homebrew requires sudo privileges. If you grant permission to install Homebrew, you may be prompted to enter your password. Press 'Enter' to choose the square-bracketed default answer: EOF printf "Is it ok to download and install Homebrew? [yes] " } ask_permission_to_install_homebrew_package() { echo "" if [ ! -z ${2+x} ]; then echo "Homebrew installs $1 collectively in one package named '$2'." echo "" fi printf "Is it ok to use Homebrew to install $1? [yes] " } CI=${CI:-"false"} # GitHub Actions workflows set CI=true exit_if_user_declines() { if [ $YES = true ]; then echo " 'yes' assumed (--yes option)" return fi if [ $CI = true ]; then echo " 'yes' assumed (GitHub Actions workflow detected)" return fi read answer if [ -n "$answer" -a "$answer" != "y" -a "$answer" != "Y" -a "$answer" != "Yes" -a "$answer" != "YES" -a "$answer" != "yes" ]; then echo "Installation declined." case ${1:-} in *GASNet*) echo "Please ensure the $pkg.pc file is in $PKG_CONFIG_PATH and then rerun './install.sh'." ;; *FC*) echo "To use compilers other than Homebrew-installed LLVM flang and clang," echo "please set the FC and CC environment variables and rerun './install.sh'." ;; *) echo "Please ensure that $1 is installed and in your PATH and then rerun './install.sh'." ;; esac echo "Caffeine was not installed." exit 1 fi } DEPENDENCIES_DIR="build/dependencies" if [ ! -d $DEPENDENCIES_DIR ]; then mkdir -p $DEPENDENCIES_DIR fi if [ -z ${FC:+x} ] || [ -z ${CC:+x} ] || [ -z ${PKG_CONFIG:+x} ] || [ -z ${REALPATH:+x} ] || [ -z ${MAKE:+x} ] || [ -z ${FPM:+x} ] ; then ask_permission_to_use_homebrew exit_if_user_declines "brew" if ! command -v $BREW > /dev/null 2>&1; then ask_permission_to_install_homebrew exit_if_user_declines "brew" curl -L https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh -o $DEPENDENCIES_DIR/install-homebrew.sh --create-dirs chmod u+x $DEPENDENCIES_DIR/install-homebrew.sh if [ -p /dev/stdin ] && [ $CI = false ]; then cat << EOF Pipe detected. Installing Homebrew requires sudo privileges, which most likely will not work if you are installing non-interactively, e.g., via 'yes | ./install.sh'. To install Caffeine non-interactiely, please rerun the Caffeine installer after executing the following command to install Homebrew: "./$DEPENDENCIES_DIR/install-homebrew.sh" EOF exit 1 else ./$DEPENDENCIES_DIR/install-homebrew.sh rm $DEPENDENCIES_DIR/install-homebrew.sh fi if [ $(uname) = "Linux" ]; then BREW=/home/linuxbrew/.linuxbrew/bin/brew eval "$($BREW shellenv)" fi fi BREW_PREFIX=`$BREW --prefix || exit 0` if [ -z ${BREW_PREFIX:+x} ] || [ ! -d "$BREW_PREFIX" ] ; then echo Failed to detect Homebrew prefix echo 1 fi # fetch the latest package definitions: $BREW update if [ -z ${FC:+x} ] || [ -z ${CC:+x} ] ; then ask_permission_to_install_homebrew_package "'llvm' and 'flang'" exit_if_user_declines "FC" $BREW install llvm flang # Homebrew does not inject clang/clang++ into PATH on macOS export PATH="$BREW_PREFIX/opt/llvm/bin:$PATH" CC=`which clang` CXX=`which clang++` FC=`which flang-new` for tool in $CC $CXX $FC ; do if ! command -v $tool > /dev/null 2>&1 ; then echo Failed to detect Homebrew compiler install at $tool exit 1 fi done fi if [ -z ${REALPATH:+x} ] || [ -z ${MAKE:+x} ] ; then ask_permission_to_install_homebrew_package "'realpath' and 'make'" "coreutils" exit_if_user_declines "realpath and make" $BREW install coreutils REALPATH=`which realpath` MAKE=`which make` fi if [ -z ${PKG_CONFIG:+x} ]; then ask_permission_to_install_homebrew_package "'pkg-config'" exit_if_user_declines "pkg-config" $BREW install pkg-config PKG_CONFIG=`which pkg-config` fi if [ -z ${FPM:+x} ] ; then ask_permission_to_install_homebrew_package "'fpm'" exit_if_user_declines "fpm" $BREW install fpm FPM=`which fpm` fi fi PREFIX=${PREFIX:-"${HOME}/.local"} mkdir -p "$PREFIX" PREFIX=`$REALPATH "$PREFIX"` echo "PREFIX=$PREFIX" if [ -z ${PKG_CONFIG_PATH:+x} ]; then PKG_CONFIG_PATH="$PREFIX/lib/pkgconfig" else PKG_CONFIG_PATH="$PREFIX/lib/pkgconfig:$PKG_CONFIG_PATH" fi echo "PKG_CONFIG_PATH=$PKG_CONFIG_PATH" FPM_FC="$($REALPATH $(command -v $FC))" if [[ $FPM_FC == *flang* ]]; then FPM_FC=${FPM_FC/flang-[1-9][0-9]*/flang-new} fi FPM_CC="$($REALPATH $(command -v $CC))" # workaround issue #228: clang cannot find Homebrew flang's C header if [ "${BREW_PREFIX:-unset}" != unset ] ; then if [[ $FPM_FC =~ flang ]] && [[ $FPM_FC =~ $BREW_PREFIX ]] ; then APPEND_CFLAGS="-I$(dirname $(find "$BREW_PREFIX/Cellar/flang" -name ISO_Fortran_binding.h | head -1))" fi fi ask_package_permission() { cat << EOF $1 not found in $2 Press 'Enter' for the square-bracketed default answer: EOF printf "Is it ok to download and install $1? [yes] " } pkg="gasnet-$GASNET_CONDUIT-seq" export PKG_CONFIG_PATH if ! $PKG_CONFIG $pkg ; then ask_package_permission "GASNet-EX" "PKG_CONFIG_PATH" exit_if_user_declines "GASNet-EX" GASNET_TAR_FILE="GASNet-$GASNET_VERSION.tar.gz" GASNET_SOURCE_URL="https://bitbucket.org/berkeleylab/gasnet/downloads/GASNet-$GASNET_VERSION.tar.gz" if [ ! -d $DEPENDENCIES_DIR ]; then mkdir -pv $DEPENDENCIES_DIR fi GASNET_DIR=$DEPENDENCIES_DIR/GASNet-$GASNET_VERSION if [ -d $GASNET_DIR ]; then # clean any existing GASNet build dir we are overwriting rm -Rf $GASNET_DIR fi curl -L $GASNET_SOURCE_URL | tar xvzf - -C $DEPENDENCIES_DIR ( cd $GASNET_DIR cmd="set -x ; ./configure --prefix=\"$PREFIX\"" # user-provided overrides: cmd="$cmd $GASNET_CONFIGURE_ARGS" # pass-thru compiler settings: cmd="$cmd --with-cc=\"$CC\" --with-cxx=\"$CXX\"" # select the GASNet config settings Caffeine requires, and disable unused features: cmd="$cmd --enable-$GASNET_CONDUIT" cmd="$cmd --enable-seq --disable-par --disable-parsync" cmd="$cmd --disable-segment-everything" # TEMPORARY: disable MPI compatibility until we figure out how to support in fpm cmd="$cmd --disable-mpi-compat" eval $cmd $MAKE -j 8 all $MAKE -j 8 install ) fi # if ! $PKG_CONFIG $pkg ; then exit_if_pkg_config_pc_file_missing() { if ! $PKG_CONFIG $1 ; then echo "$1.pc pkg-config file not found" exit 1 fi } exit_if_pkg_config_pc_file_missing "$pkg" GASNET_LDFLAGS="`$PKG_CONFIG $pkg --variable=GASNET_LDFLAGS`" GASNET_LIBS="`$PKG_CONFIG $pkg --variable=GASNET_LIBS`" GASNET_CC="`$PKG_CONFIG $pkg --variable=GASNET_CC`" GASNET_CFLAGS="`$PKG_CONFIG $pkg --variable=GASNET_CFLAGS`" GASNET_CPPFLAGS="`$PKG_CONFIG $pkg --variable=GASNET_CPPFLAGS`" # Check whether GASNet was installed using Spack. If yes, bail out. # Note: relies on the fact that most Spack installations have "opt/spack" # in the directory path, and assumes that the first directory returned # by pkg-config contains the GASNet lib directory GASNET_LIBDIR="$(echo $GASNET_LIBS | awk '{print $1};')" GASNET_LIBDIR=${GASNET_LIBDIR#-L} case "$GASNET_LIBDIR" in *spack* ) cat << EOF ***NOTICE***: The GASNet library built by Spack is ONLY intended for unit-testing purposes, and is generally UNSUITABLE FOR PRODUCTION USE. The RECOMMENDED way to build GASNet is as an embedded library as configured by the higher-level client runtime package (i.e. Caffeine), including system-specific configuration. Exiting install.sh EOF exit 1 ;; * ) GASNET_PREFIX=$(dirname $GASNET_LIBDIR) if [ ! -r "$GASNET_PREFIX/include/gasnetex.h" ] ; then echo "ERROR: Failed to detect GASNet install prefix from $GASNET_LIBS" exit 1 fi ;; esac # Strip compiler flags # Warning: This assumes the full path doesn't contain any spaces! GASNET_CC_STRIPPED="$(echo $GASNET_CC | awk '{print $1};')" GASNET_CC_REAL="$($REALPATH $GASNET_CC_STRIPPED)" if [ "$GASNET_CC_REAL" != "$FPM_CC" ]; then echo "GASNET_CC=$GASNET_CC_REAL" and "FPM_CC=$FPM_CC don't match" exit 1; fi FPM_TOML="fpm.toml" rm -f $FPM_TOML echo "# DO NOT EDIT OR COMMIT -- Created by caffeine/install.sh" > $FPM_TOML cat manifest/fpm.toml.template >> $FPM_TOML GASNET_LIB_LOCATIONS=`echo $GASNET_LIBS | awk '{locs=""; for(i = 1; i <= NF; i++) if ($i ~ /^-L/) {locs=(locs " " $i);}; print locs; }'` GASNET_LIB_NAMES=`echo $GASNET_LIBS | awk '{names=""; for(i = 1; i <= NF; i++) if ($i ~ /^-l/) {names=(names " " $i);}; print names; }' | sed 's/-l//g'` if [[ $GASNET_CONDUIT == "udp" ]] ; then GASNET_LIB_NAMES+=" stdc++" # udp-conduit requires C++ libraries fi FPM_TOML_LINK_ENTRY="link = [\"$(echo ${GASNET_LIB_NAMES} | sed 's/ /", "/g')\"]" echo "${FPM_TOML_LINK_ENTRY}" >> $FPM_TOML CAFFEINE_PC="$PREFIX/lib/pkgconfig/caffeine.pc" cat << EOF > $CAFFEINE_PC CAFFEINE_FPM_LDFLAGS=$GASNET_LDFLAGS $GASNET_LIB_LOCATIONS CAFFEINE_FPM_FC=$FPM_FC CAFFEINE_FPM_CC=$GASNET_CC CAFFEINE_FPM_CFLAGS=$GASNET_CFLAGS $GASNET_CPPFLAGS $APPEND_CFLAGS Name: caffeine Description: The CoArray Fortran Framework of Efficient Interfaces to Network Environments (Caffeine) implements the Parallel Runtime Interface for Fortran (PRIF), providing runtime support for multi-image features in modern Fortran compilers. URL: https://go.lbl.gov/caffeine Version: 0.7.2 EOF exit_if_pkg_config_pc_file_missing "caffeine" user_compiler_flags="${CPPFLAGS:-} ${FFLAGS:-}" compiler_version=$($FPM_FC --version) if [[ $compiler_version =~ 'flang' ]]; then compiler_flag="-g -O3" elif [[ $compiler_version =~ 'GNU Fortran' ]]; then compiler_flag="-g -O3 -ffree-line-length-0 -Wno-unused-dummy-argument" elif [[ $compiler_version =~ 'LFortran' ]]; then compiler_flag="-g -O3 --cpp" else # unknown compiler compiler_flag="-g -O2" echo "WARNING: Failed to detect a recognized Fortran compiler" fi # enable Assert's multi-image support with PRIF callbacks provided by libcaffeine compiler_flag+=" -DASSERT_MULTI_IMAGE -DASSERT_PARALLEL_CALLBACKS" # enable Julienne's multi-image support with PRIF callbacks provided by julienne-driver compiler_flag+=" -DHAVE_MULTI_IMAGE_SUPPORT -DJULIENNE_PARALLEL_CALLBACKS" if ! [[ "$user_compiler_flags " =~ -[DU]ASSERTIONS[=\ ] ]] ; then # default to enabling assertions, unless the command line sets a relevant flag compiler_flag+=" -DASSERTIONS" fi GASNET_CONDUIT_UPPER=$(tr '[:lower:]' '[:upper:]' <<<$GASNET_CONDUIT) compiler_flag+=" -DCAF_NETWORK_$GASNET_CONDUIT_UPPER" # Should come last to allow command-line overrides compiler_flag+=" $user_compiler_flags" case $GASNET_CONDUIT in ibv|ofi|ucx) GASNET_RUNNER_ARG="${GASNET_RUNNER_ARG:-$GASNET_PREFIX/bin/gasnetrun_$GASNET_CONDUIT -n \${CAF_IMAGES:-2}}" ;; udp) GASNET_RUNNER_ARG="${GASNET_RUNNER_ARG:-$GASNET_PREFIX/bin/amudprun -n \${CAF_IMAGES:-2}}" ;; mpi) GASNET_RUNNER_ARG="${GASNET_RUNNER_ARG:-mpirun -n \${CAF_IMAGES:-2}}" ;; smp) GASNET_RUNNER_ARG="${GASNET_RUNNER_ARG:-env GASNET_PSHM_NODES=\${CAF_IMAGES:-\${GASNET_PSHM_NODES:-}}}" ;; *) GASNET_RUNNER_ARG="${GASNET_RUNNER_ARG:-}" ;; esac RUN_FPM_SH="run-fpm.sh" cat << EOF > $RUN_FPM_SH #!/bin/sh #-- DO NOT EDIT -- created by caffeine/install.sh fpm="${FPM}" FPM_DRIVER=\${FPM_DRIVER:-\`realpath \$0\`} export FPM_DRIVER fpm_sub_cmd=\$1; shift if echo "--help -help --version -version --list -list new update list clean publish" | grep -w -q -e "\$fpm_sub_cmd" ; then set -x exec \$fpm "\$fpm_sub_cmd" "\$@" elif echo "build test run install" | grep -w -q -e "\$fpm_sub_cmd" ; then sed -i.bak 's/^link = .*\$/$FPM_TOML_LINK_ENTRY/' $FPM_TOML rm -f $FPM_TOML.bak # issue 282: this is the only portable way to use sed -i if test -n "$GASNET_RUNNER_ARG" && echo "test run" | grep -w -q -e "\$fpm_sub_cmd" ; then set -- "--runner=$GASNET_RUNNER_ARG" "\$@" fi set -x exec \$fpm "\$fpm_sub_cmd" \\ --profile debug \\ --flag "$compiler_flag" \\ --compiler "`$PKG_CONFIG caffeine --variable=CAFFEINE_FPM_FC`" \\ --c-compiler "`$PKG_CONFIG caffeine --variable=CAFFEINE_FPM_CC`" \\ --c-flag "`$PKG_CONFIG caffeine --variable=CAFFEINE_FPM_CFLAGS`" \\ --link-flag "`$PKG_CONFIG caffeine --variable=CAFFEINE_FPM_LDFLAGS`" \\ "\$@" else echo "ERROR: Unrecognized fpm subcommand \$fpm_sub_cmd" \$fpm list exit 1 fi EOF chmod u+x $RUN_FPM_SH # for backwards-compatibility of instructions/scripting: ( cd build && ln -f -s ../$RUN_FPM_SH run-fpm.sh ) ./$RUN_FPM_SH build $VERBOSE LIBCAFFEINE_DST=libcaffeine-$GASNET_CONDUIT.a LIBCAFFEINE_SRC=$(./$RUN_FPM_SH install --list 2>/dev/null | grep libcaffeine | cut -d' ' -f2) if [ -z "$LIBCAFFEINE_SRC" ]; then echo "ERROR: Failed to detect libcaffeine.a from fpm" exit 1 else mkdir -p "$PREFIX/lib" cp -af "$LIBCAFFEINE_SRC" "$PREFIX/lib/$LIBCAFFEINE_DST" ln -sf "$LIBCAFFEINE_DST" "$PREFIX/lib/libcaffeine.a" fi cat << EOF ________________ Caffeine has been dispensed! ________________ Caffeine is now installed in $PREFIX To rebuild or to run tests or examples via the Fortran Package Manager (fpm) with the required compiler/linker flags, pass a fpm command to the run-fpm.sh script. For example, run the program example/hello.f90 as follows: ./$RUN_FPM_SH run --example hello EOF fortran-caffeine-0.7.2/docs/0000775000175000017500000000000015162221361016044 5ustar alastairalastairfortran-caffeine-0.7.2/docs/README-release.md0000664000175000017500000000621515162221361020745 0ustar alastairalastairREADME-release.md ======== Release Procedure for Caffeine ------------- 1. Nominate a Release Manager with primary responsibility for ensuring each step in this procedure is followed 2. Ensure there are no open issues marked with `release-blocker` label 3. Validate correctness testing has been performed across all supported systems and supported versions of external dependencies 4. Complete release related chores in repository files 1. Update all instances of the copyright year embedded in: [LICENSE.txt](../LICENSE.txt), [manifest/fpm.toml.template](../manifest/fpm.toml.template) 2. Update all instances of the release package version number embedded in: [manifest/fpm.toml.template](../manifest/fpm.toml.template), [install.sh](../install.sh) 3. Update the author list embedded in: [manifest/fpm.toml.template](../manifest/fpm.toml.template) 4. Review top-level [README.md](../README.md) and other user-facing documentation for any necessary changes 5. Update [docs/implementation-status.md](../docs/implementation-status.md) with current status 6. If the PRIF specification revision is changing, search and update all instances of the old revision, including `CAF_PRIF_VERSION_{MAJOR,MINOR}` in [language-support.F90](../include/language-support.F90) 7. Temporarily hardcode version of gasnet installer in [install.sh](../install.sh) as the last commit in the release. Set GASNET_VERSION flag to the latest gasnet release 5. Produce the ChangeLog 1. Create draft release on GitHub 2. Review/edit the automated ChangeLog 3. Add/update list of supported features/platforms 4. Add/update list of high-level changes since last release 5. Add/update list of known defects/limitations 6. Spell-check and proofread 6. Tag a release candidate. For example `git tag #.#.#-rc1`, then `git push origin #.#.#-rc1` 7. Compel several people to manually validate the release candidate on systems of interest and with compilers and compiler versions listed in README 1. When possible, test both on shared and distributed memory systems. 2. When testing on Perlmutter, use the following steps: 1. Build source and tests on the login node using the desired compiler 2. Get a dedicated node: `salloc -t 10 -N 2 -n 8 -q interactive -A PROJECT_ID -C cpu` 3. Launch the parallel job to run the tests: `env SUBJOB_PREFIX=skip GASNET_SPAWN_VERBOSE=1 GASNET_SUPERNODE_MAXSIZE=2 CAF_IMAGES=8 build/run-fpm.sh test --verbose` 8. Create annotated tag (only after release candidate has been checked by team members) For example `git tag -a #.#.# -m "release version #.#.#"`, then `git push origin #.#.#` 9. Publish the release 10. Post release chores 1. Git revert the commit that hardcoded the gasnet version or manually edit 2. Update patch number of the version number embedded in: [manifest/fpm.toml.template](../manifest/fpm.toml.template), [install.sh](../install.sh) Update to an odd number to indicate that the `main` branch is currently a snapshot of something that is beyond the offical release 3. Update the release procedure with any new steps or changes fortran-caffeine-0.7.2/docs/README-maintainers.md0000664000175000017500000000766315162221361021647 0ustar alastairalastairREADME-maintainers.md ======== Conventions for Git and Pull Requests ------------- This repository follows the [fork-and-pull](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/getting-started/about-collaborative-development-models#fork-and-pull-model) model of development. If you would like to contribute some changes, please [fork](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks) this repository, push your proposed edits to a feature branch in your fork, and then [open a pull request](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests) against this repo when the changes are ready for review. This repository aims to maintain a strictly linear git history. In order to achieve this, please observe the following workflow: * After your PR has been approved, your feature branch will be rebased onto `origin/main` before merge. * In general you should try to avoid rebasing a non-draft PR with pending approvals until the last step before merge, because it complicates iterative review. * Only after your feature branch is up-to-date with `origin/main` may it then be merged into `main` with a merge commit. Additional git policies for the primary Caffeine repository: * Never force push to `main` * All code changes and non-trivial documentation changes require a pull request * No stray branches, except for rare cases of long-lived parallel development Conventions for code and commits in Caffeine ------------- [TODO: Improve wording of below bullets] * All Fortran filenames must have a `*.F90` suffix * Procedures with `prif_` prefix are public facing procedures * Procedures with `caf_` prefix are private procedures, internal to Caffeine, which are implemented in C * Procedures with neither of the above two prefixes are private procedures, internal to Caffeine, which are implemented in Fortran * C functions and global variables lacking a `caf_` prefix must be `static` * Identifiers named `image` and `rank` refer to processes. Any identifier named `image` represents the process as 1-based number (Fortran-style), while `rank` represents the process as 0-based number (C-style) (i.e. rank == image - 1) * When writing or making changes to BIND(C) interfaces, be vigilant when checking that the types and attributes of the arguments and return values are equivalent across the Fortran and C declarations * Avoid committing whitespace-only changes to source lines distant from meaningful PR changes. In particular disable source editor features that automatically reformat entire files. * If you absolutely must make whitespace-only changes to otherwise unmodified lines (for example, rewrapping the lines in documentation), please isolate those changes in a separate commit with a commit message explaining the lack of meaningful change. * Similarly if you need to move blocks of lines unchanged between distant locations or rename files, please also isolate those changes in a separate commit with a commit message explaining the lack of meaningful change. * Tab characters should NEVER appear in source code Conventions for test code ------------------------- * All significant features should have non-trivial correctness tests in `test/` * Every PRIF procedure must be invoked by at least one test * Correctness tests should aim to achieve complete code coverage of internal paths, and exercise any important corner-cases. * Test functions should return a Julienne `test_diagnosis_t` named `diag` - If a complete diagnosis can be computed using a single Fortran expression `expr`, then the statement `diag = expr` should appear near the end of the procedure. - Otherwise, the statement `diag = .true.` must appear as the first executable statement in the procedure, and the `ALSO/ALSO2` macros defined by [test-utils.F90](../test/test-utils.F90) should be invoked to build an incremental diagnosis. fortran-caffeine-0.7.2/docs/_config.yml0000664000175000017500000000003215162221361020166 0ustar alastairalastairtheme: jekyll-theme-caymanfortran-caffeine-0.7.2/docs/implementation-status.md0000664000175000017500000002046315162221361022741 0ustar alastairalastair# Implementation Status Caffeine is an implementation of the Parallel Runtime Interface for Fortran (PRIF). This document outlines the implementation status in Caffeine of the features defined in the [latest PRIF specification, revision 0.7](https://doi.org/10.25344/S46S3W). Caffeine contains interfaces for all of the PRIF procedures (except when stated otherwise below) and the symbols are linkable and callable, but some procedures will fail at runtime with an unimplemented error. For more details about the implementation of the various PRIF features, please see the following sections: - [Named Constants](#Named-Constants) - [Program Startup and Shutdown](#Program-Startup-and-Shutdown) - [Image Queries](#Image-Queries) - [Storage Management](#Storage-Management) - [Coarray Queries](#Coarray-Queries) - [Contiguous Coarray Access](#Contiguous-Coarray-Access) - [Strided Coarray Access](#Strided-Coarray-Access) - [SYNC Statements](#SYNC-Statements) - [Locks and Unlocks](#Locks-and-Unlocks) - [Critical](#Critical) - [Events and Notifications](#Events-and-Notifications) - [Teams](#teams) - [Collectives](#Collectives) - [Atomic Memory Operations](#Atomic-Memory-Operations) The priorites for feature implementation and addressing known defects are communicated by the labels in the Caffeine [issue tracker](https://github.com/BerkeleyLab/caffeine/issues). ## Named Constants Caffeine contains definitions for all of the PRIF-relevant constants from `ISO_FORTRAN_ENV` and for all of the PRIF-specific constants. Configuration settings (`CAF_IMPORT_*` preprocessor defines) can optionally be used to import selected constant values from `ISO_FORTRAN_ENV` of the hosting compiler. ## Program Startup and Shutdown | Procedure | Status | Notes | |-----------|--------|-------| | `prif_init` | **YES** | | | `prif_stop`, `prif_error_stop` | **YES** | | | `prif_fail_image` | **YES** | trivial implementation | | `prif_register_stop_callback` | **YES** | | --- ## Image Queries | Procedure | Status | Notes | |-----------|--------|-------| | `prif_num_images` | **YES** | | | `prif_num_images_with_team` | **YES** | | | `prif_num_images_with_team_number` | *partial* | no support for sibling teams | | `prif_this_image_no_coarray` | **YES** | | | `prif_this_image_with_coarray`, `prif_this_image_with_dim` | **YES** | | | `prif_failed_images` | **YES** | | | `prif_stopped_images` | **YES** | | | `prif_image_status` | **YES** | | --- ## Storage Management | Procedure | Status | Notes | |-----------|--------|-------| | `prif_allocate_coarray` | **YES** | includes ucobound relaxation expected in PRIF 0.8 | | `prif_allocate` | **YES** | | | `prif_deallocate_coarray` | **YES** | `final_func` support requires flang 20+ | | `prif_deallocate_coarrays` | **YES** | `final_func` support requires flang 20+ | | `prif_deallocate` | **YES** | | | `prif_alias_create` | **YES** | includes ucobound relaxation expected in PRIF 0.8 | | `prif_alias_destroy` | **YES** | | --- ## Coarray Queries | Procedure | Status | Notes | |-----------|--------|-------| | `prif_set_context_data`, `prif_get_context_data` | **YES** | | | `prif_size_bytes` | **YES** | | | `prif_lcobound_no_dim`, `prif_lcobound_with_dim` | **YES** | | | `prif_ucobound_no_dim`, `prif_ucobound_with_dim` | **YES** | | | `prif_coshape` | **YES** | | | `prif_local_data_pointer` | **YES** | | | `prif_image_index` | **YES** | | | `prif_image_index_with_team` | **YES** | | | `prif_image_index_with_team_number` | *partial* | no support for sibling teams | | `prif_initial_team_index` | **YES** | | | `prif_initial_team_index_with_team` | **YES** | | | `prif_initial_team_index_with_team_number` | *partial* | no support for sibling teams | --- ## Contiguous Coarray Access | Procedure | Status | Notes | |-----------|--------|-------| | `prif_get` | **YES** | | | `prif_get_indirect` | **YES** | | | `prif_put` | **YES** | | | `prif_put_indirect` | **YES** | | | `prif_put_with_notify` | **YES** | | | `prif_put_with_notify_indirect` | **YES** | | | `prif_put_indirect_with_notify` | **YES** | | | `prif_put_indirect_with_notify_indirect` | **YES** | | --- ## Strided Coarray Access | Procedure | Status | Notes | |-----------|--------|-------| | `prif_get_strided` | **YES** | | | `prif_get_strided_indirect` | **YES** | | | `prif_put_strided` | **YES** | | | `prif_put_strided_indirect` | **YES** | | | `prif_put_strided_with_notify` | **YES** | | | `prif_put_strided_with_notify_indirect` | **YES** | | | `prif_put_strided_indirect_with_notify` | **YES** | | | `prif_put_strided_indirect_with_notify_indirect` | **YES** | | --- ## SYNC Statements | Procedure | Status | Notes | |-----------|--------|-------| | `prif_sync_memory` | **YES** | | | `prif_sync_all` | **YES** | | | `prif_sync_images` | **YES** | | | `prif_sync_team` | **YES** | | --- ## Locks and Unlocks ### Support = no --- ## Critical ### Support = no --- ## Events and Notifications | Procedure | Status | Notes | |-----------|--------|-------| | `prif_event_post` | **YES** | | | `prif_event_post_indirect` | **YES** | | | `prif_event_wait` | **YES** | | | `prif_event_query` | **YES** | | | `prif_notify_wait` | **YES** | | --- ## Teams | Procedure | Status | Notes | |-----------|--------|-------| | `prif_form_team` | **YES** | | | `prif_get_team` | **YES** | | | `prif_team_number` | **YES** | | | `prif_change_team` | **YES** | | | `prif_end_team` | **YES** | | --- ## Collectives | Procedure | Status | Notes | |-----------|--------|-------| | `prif_co_broadcast` | **YES** | | | `prif_co_max` | **YES** | | | `prif_co_max_character` | **YES** | | | `prif_co_min` | **YES** | | | `prif_co_min_character` | **YES** | | | `prif_co_sum` | **YES** | | | `prif_co_reduce` | **YES** | | --- ## Atomic Memory Operations | Procedure | Status | Notes | |-----------|--------|-------| | `prif_atomic_add` | **YES** | | | `prif_atomic_add_indirect` | **YES** | | | `prif_atomic_and` | **YES** | | | `prif_atomic_and_indirect` | **YES** | | | `prif_atomic_or` | **YES** | | | `prif_atomic_or_indirect` | **YES** | | | `prif_atomic_xor` | **YES** | | | `prif_atomic_xor_indirect` | **YES** | | | `prif_atomic_cas_int` | **YES** | | | `prif_atomic_cas_int_indirect` | **YES** | | | `prif_atomic_cas_logical` | **YES** | | | `prif_atomic_cas_logical_indirect` | **YES** | | | `prif_atomic_fetch_add` | **YES** | | | `prif_atomic_fetch_add_indirect` | **YES** | | | `prif_atomic_fetch_and` | **YES** | | | `prif_atomic_fetch_and_indirect` | **YES** | | | `prif_atomic_fetch_or` | **YES** | | | `prif_atomic_fetch_or_indirect` | **YES** | | | `prif_atomic_fetch_xor` | **YES** | | | `prif_atomic_fetch_xor_indirect` | **YES** | | | `prif_atomic_define_int` | **YES** | | | `prif_atomic_define_int_indirect` | **YES** | | | `prif_atomic_define_logical` | **YES** | | | `prif_atomic_define_logical_indirect` | **YES** | | | `prif_atomic_ref_int` | **YES** | | | `prif_atomic_ref_int_indirect` | **YES** | | | `prif_atomic_ref_logical` | **YES** | | | `prif_atomic_ref_logical_indirect` | **YES** | | --- fortran-caffeine-0.7.2/.gitignore0000664000175000017500000000033515162221361017105 0ustar alastairalastair# fpm-generated build tree build # install.sh-generated fpm files fpm.toml run-fpm.sh # ford-generated documentation doc/html # executable programs a.out *.exe # compiler-generated intermediate files *.mod *.smod *.o fortran-caffeine-0.7.2/include/0000775000175000017500000000000015162221361016537 5ustar alastairalastairfortran-caffeine-0.7.2/include/language-support.F900000664000175000017500000000705015162221361022316 0ustar alastairalastair! Copyright (c), The Regents of the University of California ! Terms of use are as specified in LICENSE.txt #ifndef CAF_INCLUDED_LANGUAGE_SUPPORT #define CAF_INCLUDED_LANGUAGE_SUPPORT #ifdef __GNUC__ # define HAVE_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) #endif #ifndef HAVE_SELECTED_LOGICAL_KIND ! Define whether the compiler supports standard intrinsic function selected_logical_kind(), ! a feature introduced in Fortran 2023 clause 16.9.182. #if defined(_CRAYFTN) || defined(NAGFOR) || defined(__flang__) || (HAVE_GCC_VERSION >= 150000) #define HAVE_SELECTED_LOGICAL_KIND 1 #else #define HAVE_SELECTED_LOGICAL_KIND 0 #endif #endif #ifndef HAVE_PROCEDURE_ACTUAL_FOR_POINTER_DUMMY ! Define whether the compiler supports associating a procedure pointer dummy argument with an ! actual argument that is a valid target for the pointer dummy in a procedure assignment, a ! feature introduced in Fortran 2008 and described in Fortran 2023 clause 15.5.2.10 paragraph 5. #if defined(_CRAYFTN) || defined(__INTEL_COMPILER) || defined(NAGFOR) || defined(__flang__) || (HAVE_GCC_VERSION > 140200) # define HAVE_PROCEDURE_ACTUAL_FOR_POINTER_DUMMY 1 #else # define HAVE_PROCEDURE_ACTUAL_FOR_POINTER_DUMMY 0 #endif #endif #ifndef HAVE_FINAL_FUNC_SUPPORT # if defined(__GFORTRAN__) && HAVE_GCC_VERSION < 160000 ! gfortran 14-15 defect prevents declaration of the coarray_cleanup interface: ! https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113338 ! reportedly fixed in gfortran 16 # define HAVE_FINAL_FUNC_SUPPORT 0 # elif defined(__flang__) && __flang_major__ < 20 ! also missing in flang before 20 # define HAVE_FINAL_FUNC_SUPPORT 0 # else # define HAVE_FINAL_FUNC_SUPPORT 1 # endif #endif ! ISO_FORTRAN_ENV constant value control: ! The following knobs influence Caffeine's choice of value for the named constants ! specified by PRIF for ISO_FORTRAN_ENV: ! * CAF_IMPORT_{ATOMIC,STAT,TEAM}_CONSTANTS: Import PRIF constant values ! of the given category from the compiler's ISO_FORTRAN_ENV ! * CAF_IMPORT_CONSTANTS: Provides a default value for each category-specific knob above, ! which take precedence ! NOTE: In all cases imported constant values are silently assumed to satisfy ! the requirements for the corresponding PRIF named constant. ! Additionally, the ATOMIC KIND constants are assumed to denote a 64-bit interoperable type #ifndef CAF_IMPORT_CONSTANTS # if defined(__flang__) # define CAF_IMPORT_CONSTANTS 1 # else # define CAF_IMPORT_CONSTANTS 0 # endif #endif #ifndef CAF_IMPORT_ATOMIC_CONSTANTS #define CAF_IMPORT_ATOMIC_CONSTANTS CAF_IMPORT_CONSTANTS #endif #ifndef CAF_IMPORT_STAT_CONSTANTS #define CAF_IMPORT_STAT_CONSTANTS CAF_IMPORT_CONSTANTS #endif #ifndef CAF_IMPORT_TEAM_CONSTANTS #define CAF_IMPORT_TEAM_CONSTANTS CAF_IMPORT_CONSTANTS #endif ! PRIF specification version override and control ! By default, Caffeine provides the latest ratified version of the PRIF specification. ! Clients can optionally define one of the FORCE_* macros below to force compliance ! with a different revision of the PRIF specification. These override settings are ! NOT officially supported and may be removed at any time without notice. #define CAF_PRIF_VERSION_MAJOR 0 #if FORCE_PRIF_0_5 # define CAF_PRIF_VERSION_MINOR 5 #elif FORCE_PRIF_0_6 # define CAF_PRIF_VERSION_MINOR 6 #elif FORCE_PRIF_0_7 # define CAF_PRIF_VERSION_MINOR 7 #elif FORCE_PRIF_0_8 # define CAF_PRIF_VERSION_MINOR 8 #else # define CAF_PRIF_VERSION_MINOR 7 #endif #define CAF_PRIF_VERSION (100 * CAF_PRIF_VERSION_MAJOR + CAF_PRIF_VERSION_MINOR) #endif fortran-caffeine-0.7.2/.github/0000775000175000017500000000000015162221361016454 5ustar alastairalastairfortran-caffeine-0.7.2/.github/workflows/0000775000175000017500000000000015162221361020511 5ustar alastairalastairfortran-caffeine-0.7.2/.github/workflows/build.yml0000664000175000017500000002767415162221361022353 0ustar alastairalastairname: Build on: [push, pull_request] defaults: run: shell: bash jobs: build: name: ${{ matrix.compiler }}-${{ matrix.version }} ${{ matrix.network }} (${{ matrix.os }}) ${{ matrix.label }} runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: os: [ macos-14, macos-15, macos-15-intel, macos-26, ubuntu-24.04 ] compiler: [ gfortran ] version: [ 13, 14, 15 ] network: [ smp ] include: # --- flang coverage --- - os: macos-14 compiler: flang version: 22 network: smp native_multi_image: 1 brew_via_install: 1 FFLAGS: -fcoarray - os: macos-15 compiler: flang version: 22 network: smp brew_via_install: 1 native_multi_image: 1 FFLAGS: -fcoarray - os: macos-15-intel compiler: flang version: 22 network: smp brew_via_install: 1 native_multi_image: 1 FFLAGS: -fcoarray - os: macos-26 compiler: flang version: 22 network: smp brew_via_install: 1 native_multi_image: 1 FFLAGS: -fcoarray # https://hub.docker.com/r/snowstep/llvm/tags - os: ubuntu-24.04 compiler: flang version: latest network: smp native_multi_image: 1 FFLAGS: -fcoarray container: snowstep/llvm:ubuntu-24.04-latest # https://hub.docker.com/r/phhargrove/llvm-flang/tags - os: ubuntu-24.04 compiler: flang version: 22 network: smp native_multi_image: 1 FFLAGS: -fcoarray container: phhargrove/llvm-flang:22.1.0-latest - os: ubuntu-24.04 compiler: flang version: 21 network: smp container: phhargrove/llvm-flang:21.1.0-latest - os: ubuntu-24.04 compiler: flang version: 20 network: smp container: phhargrove/llvm-flang:20.1.0-latest - os: ubuntu-24.04 compiler: flang version: 19 network: smp FFLAGS: -mmlir -allow-assumed-rank container: phhargrove/llvm-flang:19.1.1-latest # --- PRIF-old coverage --- - os: ubuntu-24.04 compiler: flang version: 21 network: smp label: PRIF-0.5 FFLAGS: -DFORCE_PRIF_0_5 container: phhargrove/llvm-flang:21.1.0-latest - os: ubuntu-24.04 compiler: flang version: 21 network: smp label: PRIF-0.6 FFLAGS: -DFORCE_PRIF_0_6 container: phhargrove/llvm-flang:21.1.0-latest # --- udp coverage for selected configs --- - os: macos-15 compiler: gfortran version: 14 network: udp - os: ubuntu-24.04 compiler: gfortran version: 15 network: udp - os: ubuntu-24.04 compiler: gfortran version: 14 network: udp - os: macos-15-intel compiler: flang version: 22 network: udp native_multi_image: 1 FFLAGS: -fcoarray - os: macos-15 compiler: flang version: 22 network: udp native_multi_image: 1 FFLAGS: -fcoarray - os: macos-26 compiler: flang version: 22 network: udp native_multi_image: 1 FFLAGS: -fcoarray - os: ubuntu-24.04 compiler: flang version: 20 network: udp container: phhargrove/llvm-flang:20.1.0-latest - os: ubuntu-24.04 compiler: flang version: 21 network: udp container: phhargrove/llvm-flang:21.1.0-latest - os: ubuntu-24.04 compiler: flang version: 22 network: udp native_multi_image: 1 FFLAGS: -fcoarray container: phhargrove/llvm-flang:22.1.0-latest - os: ubuntu-24.04 compiler: flang version: latest network: udp native_multi_image: 1 FFLAGS: -fcoarray container: snowstep/llvm:ubuntu-24.04-latest container: image: ${{ matrix.container }} env: COMPILER_VERSION: ${{ matrix.version }} FFLAGS: ${{ matrix.FFLAGS }} PREFIX: install GASNET_CONFIGURE_ARGS: --enable-rpath --enable-debug GASNET_SPAWN_VERBOSE: 1 GASNET_SPAWNFN: L GASNET_MASTERIP: 127.0.0.1 CAF_IMAGES: 8 steps: - name: Set gfortran variables if: matrix.compiler == 'gfortran' run: | set -x echo "FC=gfortran-${COMPILER_VERSION}" >> "$GITHUB_ENV" echo "CC=gcc-${COMPILER_VERSION}" >> "$GITHUB_ENV" echo "CXX=g++-${COMPILER_VERSION}" >> "$GITHUB_ENV" - name: Set flang variables if: ${{ matrix.compiler == 'flang' && !matrix.brew_via_install }} run: | set -x echo "FC=flang-new" >> "$GITHUB_ENV" echo "CC=clang" >> "$GITHUB_ENV" echo "CXX=clang++" >> "$GITHUB_ENV" - name: Set Caffeine variables run: | set -x # docker instances cannot handle high levels of subjob parallelism if test -n "${{ matrix.container }}"; then \ echo "SUBJOB_PREFIX=CAF_IMAGES=2" >> "$GITHUB_ENV" ; \ fi # disable shared-memory bypass with network=udp to simulate multi-node runs if test "${{ matrix.network }}" = "udp"; then \ echo "GASNET_SUPERNODE_MAXSIZE=1" >> "$GITHUB_ENV" ; \ fi # Turn some knobs for a compiler that natively uses PRIF for multi-image features: # HAVE_MULTI_IMAGE : controls app/native-multi-image and prif_init testing # HAVE_MULTI_IMAGE_SUPPORT : force-enable Julienne's multi-image support if (( ${{ matrix.native_multi_image }} )); then \ echo "FFLAGS=$FFLAGS -DHAVE_MULTI_IMAGE -DHAVE_MULTI_IMAGE_SUPPORT" >> "$GITHUB_ENV" ; \ fi - name: Checkout code uses: actions/checkout@v1 - name: Install Ubuntu Native Dependencies if: ${{ contains(matrix.os, 'ubuntu') && matrix.container == '' }} run: | set -x sudo apt update sudo apt install -y build-essential pkg-config make if [[ ${COMPILER_VERSION} < 15 ]] ; then \ sudo apt install -y gfortran-${COMPILER_VERSION} g++-${COMPILER_VERSION} ; \ else \ curl -L https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh -o install-homebrew.sh ; \ chmod +x install-homebrew.sh ; \ env CI=1 ./install-homebrew.sh ; \ HOMEBREW_PREFIX="/home/linuxbrew/.linuxbrew" ; \ ${HOMEBREW_PREFIX}/bin/brew install -v gcc@${COMPILER_VERSION} binutils ; \ ls -al ${HOMEBREW_PREFIX}/bin ; \ echo "PATH=${HOMEBREW_PREFIX}/bin:${PATH}" >> "$GITHUB_ENV" ; \ : Homebrew GCC@15 needs binutils 2.44+ ; \ HOMEBREW_BINUTILS=$(ls -d ${HOMEBREW_PREFIX}/Cellar/binutils/2.*/bin ) ; \ ls -al ${HOMEBREW_BINUTILS} ; \ echo "FFLAGS=${FFLAGS:+"$FFLAGS" }-B ${HOMEBREW_BINUTILS}" >> "$GITHUB_ENV" ; \ echo "CFLAGS=${CFLAGS:+"$CFLAGS" }-B ${HOMEBREW_BINUTILS}" >> "$GITHUB_ENV" ; \ echo "CXXFLAGS=${CXXFLAGS:+"$CXXFLAGS" }-B ${HOMEBREW_BINUTILS}" >> "$GITHUB_ENV" ; \ echo "LDFLAGS=${LDFLAGS:+"$LDFLAGS" }-B ${HOMEBREW_BINUTILS}" >> "$GITHUB_ENV" ; \ fi - name: Install Ubuntu Container Dependencies if: ${{ contains(matrix.os, 'ubuntu') && matrix.container != '' && !contains(matrix.container, 'phhargrove') }} run: | set -x apt update apt install -y build-essential pkg-config make git curl - name: Install macOS Dependencies if: ${{ contains(matrix.os, 'macos') && !matrix.brew_via_install }} run: | set -x brew update # fpm binary distribution for macOS requires gfortran shared libraries from gcc@12 brew install gcc@12 - name: Install LLVM flang on macOS if: ${{ contains(matrix.os, 'macos') && matrix.compiler == 'flang' && !matrix.brew_via_install }} run: | set -x brew install llvm@${COMPILER_VERSION} flang # Prepend homebrew clang to PATH: echo "PATH=$(brew --prefix)/opt/llvm/bin:${PATH}" >> "$GITHUB_ENV" - name: Setup FPM if: ${{ !matrix.brew_via_install }} uses: fortran-lang/setup-fpm@main with: github-token: ${{ secrets.GITHUB_TOKEN }} fpm-version: latest - name: Build FPM if: false run: | set -x export FPM_VERSION=0.12.0 curl --retry 5 -LOsS https://github.com/fortran-lang/fpm/releases/download/v$FPM_VERSION/fpm-$FPM_VERSION.F90 mkdir fpm-temp gfortran-14 -o fpm-temp/fpm fpm-$FPM_VERSION.F90 echo "PATH=${PWD}/fpm-temp:${PATH}" >> "$GITHUB_ENV" - name: Version info run: | echo == TOOL VERSIONS == echo Platform version info: uname -a if test -r /etc/os-release ; then grep -e NAME -e VERSION /etc/os-release ; fi if test -x /usr/bin/sw_vers ; then /usr/bin/sw_vers ; fi echo echo PATH="$PATH" for tool in ${FC} ${CC} ${CXX} fpm ; do if command -v $tool > /dev/null 2>&1 ; then ( echo ; set -x ; w=$(which $tool) ; ls -al $w ; ls -alhL $w ; $tool --version ) fi done - name: Build Caffeine (install.sh) run: | for var in FC CC CXX FFLAGS CPPFLAGS CFLAGS LDFLAGS LIBS GASNET_CONFIGURE_ARGS ; do \ eval echo "$var=\$$var"; done set -x ./install.sh --prefix=${PREFIX} --network=${{ matrix.network }} --verbose - name: Run examples run: | echo CAF_IMAGES=${CAF_IMAGES} set -x ./run-fpm.sh run --verbose --example hello - name: Run native multi-image test if: ${{ matrix.native_multi_image }} run: | set -x ; ./run-fpm.sh run --verbose 2>&1 | tee output ; \ test ${PIPESTATUS[0]} = 0 && \ ! grep -q "IEEE arithmetic exceptions signaled" output - name: Run unit tests run: | echo SUBJOB_PREFIX=${SUBJOB_PREFIX} while (( CAF_IMAGES > 0 )); do \ echo CAF_IMAGES=${CAF_IMAGES} ; \ ( set -x ; ./run-fpm.sh test --verbose -- -d ) ; \ sleep 1 ; \ CAF_IMAGES=$(( CAF_IMAGES / 2 )) ; \ done - name: Run exit/failure tests run: | echo CAF_IMAGES=${CAF_IMAGES} set -x ./run-fpm.sh run --verbose --example stop_with_no_code ( set +e ; ./run-fpm.sh run --verbose --example stop_with_integer_code ; test $? = 99 ) ( set +e ; ./run-fpm.sh run --verbose --example error_stop_with_integer_code ; test $? = 100 ) ( set +e ; \ export CAF_IMAGES=1; \ ./run-fpm.sh run --verbose --example fail_image 2>&1 | tee output ; \ test ${PIPESTATUS[0]} > 0 && grep -q "FAIL IMAGE" output \ ) ( set +e ; \ ./run-fpm.sh run --verbose --example out_of_memory 2>&1 | tee output ; \ test ${PIPESTATUS[0]} > 0 && grep -q "out of memory" output \ ) ( set +e ; \ ./run-fpm.sh run --verbose --example out_of_memory -- --coarray 2>&1 | tee output ; \ test ${PIPESTATUS[0]} > 0 && grep -q "out of memory" output \ ) unset GASNET_SPAWN_VERBOSE for ((i=1; i<=4; i++)); do \ (set +e ; \ ./run-fpm.sh run --verbose --example exit_case -- $i 2>&1 | tee output ; \ test ${PIPESTATUS[0]} = $((i + 100)) \ && grep -q "stdout from image $CAF_IMAGES" output \ && grep -q "stderr from image $CAF_IMAGES" output \ ) ; \ done fortran-caffeine-0.7.2/manifest/0000775000175000017500000000000015162221361016722 5ustar alastairalastairfortran-caffeine-0.7.2/manifest/fpm.toml.template0000664000175000017500000000077415162221361022223 0ustar alastairalastairname = "caffeine" version = "0.7.2" license = "BSD-3-Clause-LBNL" author = ["Damian Rouson", "Brad Richardson", "Katherine Rasmussen", "Dan Bonachea"] maintainer = "fortran@lbl.gov" copyright = "2021-2026 The Regents of the University of California, through Lawrence Berkeley National Laboratory" [dev-dependencies] assert = {git = "https://github.com/berkeleylab/assert.git", tag = "3.1.0"} julienne = {git = "https://github.com/berkeleylab/julienne.git", tag = "3.6.2"} [install] library = true [build]