Changeset 26805
- Timestamp:
- Apr 18, 2022, 11:37:08 PM (2 years ago)
- Location:
- ps/trunk/source/ps
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
ps/trunk/source/ps/Profile.cpp
r24530 r26805 1 /* Copyright (C) 202 1Wildfire Games.1 /* Copyright (C) 202 Wildfire Games. 2 2 * This file is part of 0 A.D. 3 3 * … … 28 28 #include "lib/timer.h" 29 29 30 #if OS_WIN && !defined(NDEBUG)31 # define USE_CRT_SET_ALLOC_HOOK32 #endif33 34 #if defined(__GLIBC__) && !defined(NDEBUG)35 //# define USE_GLIBC_MALLOC_HOOK36 # define USE_GLIBC_MALLOC_OVERRIDE37 # include <dlfcn.h>38 # include <malloc.h>39 # include "lib/sysdep/cpu.h"40 #endif41 42 30 #include <numeric> 43 31 … … 82 70 columns.push_back(ProfileColumn("calls/frame", 80)); 83 71 columns.push_back(ProfileColumn("msec/frame", 80)); 84 columns.push_back(ProfileColumn("mallocs/frame", 120));85 72 columns.push_back(ProfileColumn("calls/turn", 80)); 86 73 columns.push_back(ProfileColumn("msec/turn", 80)); 87 columns.push_back(ProfileColumn("mallocs/turn", 80));88 74 } 89 75 }; … … 161 147 double unlogged_time_frame = node->GetFrameTime(); 162 148 double unlogged_time_turn = node->GetTurnTime(); 163 double unlogged_mallocs_frame = node->GetFrameMallocs();164 double unlogged_mallocs_turn = node->GetTurnMallocs();165 149 CProfileNode::const_profile_iterator it; 166 150 … … 169 153 unlogged_time_frame -= (*it)->GetFrameTime(); 170 154 unlogged_time_turn -= (*it)->GetTurnTime(); 171 unlogged_mallocs_frame -= (*it)->GetFrameMallocs();172 unlogged_mallocs_turn -= (*it)->GetTurnMallocs();173 155 } 174 156 for (it = node->GetScriptChildren()->begin(); it != node->GetScriptChildren()->end(); ++it) … … 176 158 unlogged_time_frame -= (*it)->GetFrameTime(); 177 159 unlogged_time_turn -= (*it)->GetTurnTime(); 178 unlogged_mallocs_frame -= (*it)->GetFrameMallocs();179 unlogged_mallocs_turn -= (*it)->GetTurnMallocs();180 160 } 181 161 … … 185 165 { 186 166 unlogged_time_turn = 0.0; 187 unlogged_mallocs_turn = 0.0;188 167 } 189 168 190 169 if (col == 2) 191 170 sprintf_s(buf, ARRAY_SIZE(buf), "%.3f", unlogged_time_frame * 1000.0f); 192 else if (col == 3) 193 sprintf_s(buf, ARRAY_SIZE(buf), "%.1f", unlogged_mallocs_frame); 194 else if (col == 5) 171 else if (col == 4) 195 172 sprintf_s(buf, ARRAY_SIZE(buf), "%.3f", unlogged_time_turn * 1000.f); 196 else if (col == 6)197 sprintf_s(buf, ARRAY_SIZE(buf), "%.1f", unlogged_mallocs_turn);198 173 199 174 return CStr(buf); … … 213 188 break; 214 189 case 3: 215 sprintf_s(buf, ARRAY_SIZE(buf), "%.1f", child->Get FrameMallocs());190 sprintf_s(buf, ARRAY_SIZE(buf), "%.1f", child->Gets()); 216 191 break; 217 192 case 4: 218 sprintf_s(buf, ARRAY_SIZE(buf), "%.1f", child->GetTurnCalls());219 break;220 case 5:221 193 sprintf_s(buf, ARRAY_SIZE(buf), "%.3f", child->GetTurnTime() * 1000.0f); 222 break;223 case 6:224 sprintf_s(buf, ARRAY_SIZE(buf), "%.1f", child->GetTurnMallocs());225 194 break; 226 195 } … … 313 282 } 314 283 315 double CProfileNode::GetFrameMallocs() const316 {317 return average(mallocs_per_frame);318 }319 320 double CProfileNode::GetTurnMallocs() const321 {322 return average(mallocs_per_turn);323 }324 325 284 const CProfileNode* CProfileNode::GetChild( const char* childName ) const 326 285 { … … 384 343 time_turn_current = 0.0; 385 344 386 mallocs_per_frame.clear();387 mallocs_per_turn.clear();388 mallocs_frame_current = 0;389 mallocs_turn_current = 0;390 391 345 profile_iterator it; 392 346 for (it = children.begin(); it != children.end(); ++it) … … 400 354 calls_per_frame.push_back(calls_frame_current); 401 355 time_per_frame.push_back(time_frame_current); 402 mallocs_per_frame.push_back(mallocs_frame_current);403 356 404 357 calls_frame_current = 0; 405 358 time_frame_current = 0.0; 406 mallocs_frame_current = 0;407 359 408 360 profile_iterator it; … … 417 369 calls_per_turn.push_back(calls_turn_current); 418 370 time_per_turn.push_back(time_turn_current); 419 mallocs_per_turn.push_back(mallocs_turn_current);420 371 421 372 calls_turn_current = 0; 422 373 time_turn_current = 0.0; 423 mallocs_turn_current = 0;424 374 425 375 profile_iterator it; … … 430 380 } 431 381 432 // TODO: these should probably only count allocations that occur in the thread being profiled433 #if defined(USE_CRT_SET_ALLOC_HOOK)434 435 static long malloc_count = 0;436 static _CRT_ALLOC_HOOK prev_hook;437 438 static int crt_alloc_hook(int allocType, void* userData, size_t size, int blockType,439 long requestNumber, const unsigned char* filename, int lineNumber)440 {441 if (allocType == _HOOK_ALLOC && Threading::IsMainThread())442 ++malloc_count;443 444 if (prev_hook)445 return prev_hook(allocType, userData, size, blockType, requestNumber, filename, lineNumber);446 else447 return 1;448 }449 450 static void alloc_hook_initialize()451 {452 prev_hook = _CrtSetAllocHook(crt_alloc_hook);453 }454 455 static long get_memory_alloc_count()456 {457 return malloc_count;458 }459 460 #elif defined(USE_GLIBC_MALLOC_HOOK)461 462 // Set up malloc hooks to count allocations - see463 // http://www.gnu.org/software/libc/manual/html_node/Hooks-for-Malloc.html464 static intptr_t malloc_count = 0;465 static void *(*old_malloc_hook) (size_t, const void*);466 static std::mutex alloc_hook_mutex;467 static void *malloc_hook(size_t size, const void* UNUSED(caller))468 {469 // This doesn't really work across threads. The hooks are global variables, and470 // we have to temporarily unhook in order to call the real malloc, and during that471 // time period another thread may perform an unhooked (hence uncounted) allocation472 // which we will miss.473 474 // Two threads may execute the hook simultaneously, so we need to do the475 // temporary unhooking in a thread-safe way, so for simplicity we just use a mutex.476 std::lock_guard<std::mutex> lock(alloc_hook_mutex);477 ++malloc_count;478 __malloc_hook = old_malloc_hook;479 void* result = malloc(size);480 old_malloc_hook = __malloc_hook;481 __malloc_hook = malloc_hook;482 return result;483 }484 485 static void alloc_hook_initialize()486 {487 std::lock_guard<std::mutex> lock(alloc_hook_mutex);488 old_malloc_hook = __malloc_hook;489 __malloc_hook = malloc_hook;490 // (we don't want to bother hooking realloc and memalign, because if they allocate491 // new memory then they'll be caught by the malloc hook anyway)492 }493 /*494 It would be nice to do:495 __attribute__ ((visibility ("default"))) void (*__malloc_initialize_hook)() = malloc_initialize_hook;496 except that doesn't seem to work in practice, since something (?) resets the497 hook to NULL some time while loading the game, after we've set it here - so498 we just call malloc_initialize_hook once inside CProfileManager::Frame instead499 and hope nobody deletes our hook after that.500 */501 static long get_memory_alloc_count()502 {503 return malloc_count;504 }505 506 #elif defined(USE_GLIBC_MALLOC_OVERRIDE)507 508 static intptr_t alloc_count = 0;509 510 // We override the malloc/realloc/calloc/free functions and then use dlsym to511 // defer the actual allocation to the real libc implementation.512 // The dlsym call will (in glibc 2.9/2.10) call calloc once (to allocate an error513 // message structure), so we have a bootstrapping problem when trying to514 // get the first called function via dlsym. So we kludge it by returning a statically-allocated515 // buffer for the very first call to calloc after we've called dlsym.516 // This is quite hacky but it seems to just about work in practice...517 518 // TODO: KNOWN ISSUE: Use after free and infinite recursion519 // We assign the glibc free function to libc_free in our malloc/calloc function (with dlsym).520 // We did that in the free function before, but had to change it to work around the first problem described below.521 // It's not a good solution because some of the problems described here can reappear when the first522 // call to malloc/calloc changes and enters the function with a different state.523 //524 // Dl* functions (dlsym, dlopen etc.) store an error message internally if something fails.525 // Calling dlerror returns a pointer to this error message. Calling dlerror a second time or calling dlsym526 // causes it to free the internal storage for this error message.527 // This behaviour can cause two types of problems:528 //529 // 1. Infinite recursion due to free call530 // Problem occurs if: We use any of the dl* functions in our free function and free gets called with an internal531 // error message buffer allocated.532 // What happens: Our call to the dl* function causes another free-call insdie glibc which calls our free function533 // and can cause infinite recursion.534 //535 // 2. Use after free536 // Problem occurs if: An external library (or any other function) calls a dl* function that stores an internal537 // error string, then calls dlerror to receive the message and then calls any of our malloc/calloc/realloc/free fuctions.538 // Our function uses one of the dl* functions too. After calling our function, it tries to use the error message pointer539 // it got with dlerror before.540 // What happens: Our call to the dl* function will free the storage of the message and the pointer in the external library541 // becomes invalid. We get undefined behaviour if the extern library uses the error message pointer after that.542 543 544 static bool alloc_bootstrapped = false;545 static char alloc_bootstrap_buffer[32]; // sufficient for x86_64546 static bool alloc_has_called_dlsym = false;547 static void (*libc_free)(void*) = NULL;548 // (We'll only be running a single thread at this point so no need for locking these variables)549 550 //#define ALLOC_DEBUG551 552 void* malloc(size_t sz)553 {554 cpu_AtomicAdd(&alloc_count, 1);555 556 static void *(*libc_malloc)(size_t);557 if (libc_malloc == NULL)558 {559 alloc_has_called_dlsym = true;560 libc_malloc = (void *(*)(size_t)) dlsym(RTLD_NEXT, "malloc");561 }562 void* ret = libc_malloc(sz);563 #ifdef ALLOC_DEBUG564 printf("### malloc(%d) = %p\n", sz, ret);565 #endif566 567 if (libc_free == NULL)568 libc_free = (void (*)(void*)) dlsym(RTLD_NEXT, "free");569 570 return ret;571 }572 573 void* realloc(void* ptr, size_t sz)574 {575 cpu_AtomicAdd(&alloc_count, 1);576 577 static void *(*libc_realloc)(void*, size_t);578 if (libc_realloc == NULL)579 {580 alloc_has_called_dlsym = true;581 libc_realloc = (void *(*)(void*, size_t)) dlsym(RTLD_NEXT, "realloc");582 }583 void* ret = libc_realloc(ptr, sz);584 #ifdef ALLOC_DEBUG585 printf("### realloc(%p, %d) = %p\n", ptr, sz, ret);586 #endif587 return ret;588 }589 590 void* calloc(size_t nm, size_t sz)591 {592 cpu_AtomicAdd(&alloc_count, 1);593 594 static void *(*libc_calloc)(size_t, size_t);595 if (libc_calloc == NULL)596 {597 if (alloc_has_called_dlsym && !alloc_bootstrapped)598 {599 ENSURE(nm*sz <= ARRAY_SIZE(alloc_bootstrap_buffer));600 #ifdef ALLOC_DEBUG601 printf("### calloc-bs(%d, %d) = %p\n", nm, sz, alloc_bootstrap_buffer);602 #endif603 alloc_bootstrapped = true;604 return alloc_bootstrap_buffer;605 }606 alloc_has_called_dlsym = true;607 libc_calloc = (void *(*)(size_t, size_t)) dlsym(RTLD_NEXT, "calloc");608 }609 void* ret = libc_calloc(nm, sz);610 #ifdef ALLOC_DEBUG611 printf("### calloc(%d, %d) = %p\n", nm, sz, ret);612 #endif613 614 if (libc_free == NULL)615 libc_free = (void (*)(void*)) dlsym(RTLD_NEXT, "free");616 617 return ret;618 }619 620 void free(void* ptr)621 {622 // Might be triggered if free is called before any calloc/malloc calls or if the dlsym call inside623 // our calloc/malloc function causes a free call. Read the known issue comment block a few lines above.624 ENSURE (libc_free != NULL);625 626 libc_free(ptr);627 #ifdef ALLOC_DEBUG628 printf("### free(%p)\n", ptr);629 #endif630 }631 632 static void alloc_hook_initialize()633 {634 }635 636 static long get_memory_alloc_count()637 {638 return alloc_count;639 }640 641 #else642 643 static void alloc_hook_initialize()644 {645 }646 static long get_memory_alloc_count()647 {648 // TODO: don't show this column of data when we don't have sensible values649 // to display.650 return 0;651 }652 #endif653 654 382 void CProfileNode::Call() 655 383 { … … 659 387 { 660 388 start = timer_Time(); 661 start_mallocs = get_memory_alloc_count();662 389 } 663 390 } … … 669 396 670 397 double now = timer_Time(); 671 long allocs = get_memory_alloc_count();672 398 time_frame_current += (now - start); 673 399 time_turn_current += (now - start); 674 mallocs_frame_current += (allocs - start_mallocs);675 mallocs_turn_current += (allocs - start_mallocs);676 400 return true; 677 401 } … … 715 439 void CProfileManager::Frame() 716 440 { 717 ONCE(alloc_hook_initialize());718 719 441 root->time_frame_current += (timer_Time() - root->start); 720 root->mallocs_frame_current += (get_memory_alloc_count() - root->start_mallocs);721 442 722 443 root->Frame(); … … 729 450 730 451 root->start = timer_Time(); 731 root->start_mallocs = get_memory_alloc_count();732 452 } 733 453 -
ps/trunk/source/ps/Profile.h
r25394 r26805 1 /* Copyright (C) 202 1Wildfire Games.1 /* Copyright (C) 202 Wildfire Games. 2 2 * This file is part of 0 A.D. 3 3 * … … 63 63 double GetTurnCalls() const; 64 64 double GetTurnTime() const; 65 double GetFrameMallocs() const;66 double GetTurnMallocs() const;67 65 68 66 const CProfileNode* GetChild( const char* name ) const; … … 104 102 RingBuf<double, PROFILE_AMORTIZE_TURNS> time_per_turn; 105 103 106 long mallocs_frame_current;107 long mallocs_turn_current;108 RingBuf<long, PROFILE_AMORTIZE_FRAMES> mallocs_per_frame;109 RingBuf<long, PROFILE_AMORTIZE_TURNS> mallocs_per_turn;110 111 104 double start; 112 long start_mallocs;113 105 int recursion; 114 106
Note:
See TracChangeset
for help on using the changeset viewer.