Changeset 26805

Timestamp:
Apr 18, 2022, 11:37:08 PM (2 years ago)
Author:
Vladislav Belov
Message:

Removes the Profiler allocation counter. Fixes #6476

Refs rP4805, rP5180, rP13910.

Patch By: smiley

Differential Revision: https://code.wildfiregames.com/D4606

Location:
ps/trunk/source/ps
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • ps/trunk/source/ps/Profile.cpp

    r24530 r26805  
    1 /* Copyright (C) 2021 Wildfire Games.
     1/* Copyright (C) 202 Wildfire Games.
    22 * This file is part of 0 A.D.
    33 *
     
    2828#include "lib/timer.h"
    2929
    30 #if OS_WIN && !defined(NDEBUG)
    31 # define USE_CRT_SET_ALLOC_HOOK
    32 #endif
    33 
    34 #if defined(__GLIBC__) && !defined(NDEBUG)
    35 //# define USE_GLIBC_MALLOC_HOOK
    36 # define USE_GLIBC_MALLOC_OVERRIDE
    37 # include <dlfcn.h>
    38 # include <malloc.h>
    39 # include "lib/sysdep/cpu.h"
    40 #endif
    41 
    4230#include <numeric>
    4331
     
    8270            columns.push_back(ProfileColumn("calls/frame", 80));
    8371            columns.push_back(ProfileColumn("msec/frame", 80));
    84             columns.push_back(ProfileColumn("mallocs/frame", 120));
    8572            columns.push_back(ProfileColumn("calls/turn", 80));
    8673            columns.push_back(ProfileColumn("msec/turn", 80));
    87             columns.push_back(ProfileColumn("mallocs/turn", 80));
    8874        }
    8975    };
     
    161147        double unlogged_time_frame = node->GetFrameTime();
    162148        double unlogged_time_turn = node->GetTurnTime();
    163         double unlogged_mallocs_frame = node->GetFrameMallocs();
    164         double unlogged_mallocs_turn = node->GetTurnMallocs();
    165149        CProfileNode::const_profile_iterator it;
    166150
     
    169153            unlogged_time_frame -= (*it)->GetFrameTime();
    170154            unlogged_time_turn -= (*it)->GetTurnTime();
    171             unlogged_mallocs_frame -= (*it)->GetFrameMallocs();
    172             unlogged_mallocs_turn -= (*it)->GetTurnMallocs();
    173155        }
    174156        for (it = node->GetScriptChildren()->begin(); it != node->GetScriptChildren()->end(); ++it)
     
    176158            unlogged_time_frame -= (*it)->GetFrameTime();
    177159            unlogged_time_turn -= (*it)->GetTurnTime();
    178             unlogged_mallocs_frame -= (*it)->GetFrameMallocs();
    179             unlogged_mallocs_turn -= (*it)->GetTurnMallocs();
    180160        }
    181161
     
    185165        {
    186166            unlogged_time_turn = 0.0;
    187             unlogged_mallocs_turn = 0.0;
    188167        }
    189168
    190169        if (col == 2)
    191170            sprintf_s(buf, ARRAY_SIZE(buf), "%.3f", unlogged_time_frame * 1000.0f);
    192         else if (col == 3)
    193             sprintf_s(buf, ARRAY_SIZE(buf), "%.1f", unlogged_mallocs_frame);
    194         else if (col == 5)
     171        else if (col == 4)
    195172            sprintf_s(buf, ARRAY_SIZE(buf), "%.3f", unlogged_time_turn * 1000.f);
    196         else if (col == 6)
    197             sprintf_s(buf, ARRAY_SIZE(buf), "%.1f", unlogged_mallocs_turn);
    198173
    199174        return CStr(buf);
     
    213188        break;
    214189    case 3:
    215         sprintf_s(buf, ARRAY_SIZE(buf), "%.1f", child->GetFrameMallocs());
     190        sprintf_s(buf, ARRAY_SIZE(buf), "%.1f", child->Gets());
    216191        break;
    217192    case 4:
    218         sprintf_s(buf, ARRAY_SIZE(buf), "%.1f", child->GetTurnCalls());
    219         break;
    220     case 5:
    221193        sprintf_s(buf, ARRAY_SIZE(buf), "%.3f", child->GetTurnTime() * 1000.0f);
    222         break;
    223     case 6:
    224         sprintf_s(buf, ARRAY_SIZE(buf), "%.1f", child->GetTurnMallocs());
    225194        break;
    226195    }
     
    313282}
    314283
    315 double CProfileNode::GetFrameMallocs() const
    316 {
    317     return average(mallocs_per_frame);
    318 }
    319 
    320 double CProfileNode::GetTurnMallocs() const
    321 {
    322     return average(mallocs_per_turn);
    323 }
    324 
    325284const CProfileNode* CProfileNode::GetChild( const char* childName ) const
    326285{
     
    384343    time_turn_current = 0.0;
    385344
    386     mallocs_per_frame.clear();
    387     mallocs_per_turn.clear();
    388     mallocs_frame_current = 0;
    389     mallocs_turn_current = 0;
    390 
    391345    profile_iterator it;
    392346    for (it = children.begin(); it != children.end(); ++it)
     
    400354    calls_per_frame.push_back(calls_frame_current);
    401355    time_per_frame.push_back(time_frame_current);
    402     mallocs_per_frame.push_back(mallocs_frame_current);
    403356
    404357    calls_frame_current = 0;
    405358    time_frame_current = 0.0;
    406     mallocs_frame_current = 0;
    407359
    408360    profile_iterator it;
     
    417369    calls_per_turn.push_back(calls_turn_current);
    418370    time_per_turn.push_back(time_turn_current);
    419     mallocs_per_turn.push_back(mallocs_turn_current);
    420371
    421372    calls_turn_current = 0;
    422373    time_turn_current = 0.0;
    423     mallocs_turn_current = 0;
    424374
    425375    profile_iterator it;
     
    430380}
    431381
    432 // TODO: these should probably only count allocations that occur in the thread being profiled
    433 #if defined(USE_CRT_SET_ALLOC_HOOK)
    434 
    435 static long malloc_count = 0;
    436 static _CRT_ALLOC_HOOK prev_hook;
    437 
    438 static int crt_alloc_hook(int allocType, void* userData, size_t size, int blockType,
    439     long requestNumber, const unsigned char* filename, int lineNumber)
    440 {
    441     if (allocType == _HOOK_ALLOC && Threading::IsMainThread())
    442         ++malloc_count;
    443 
    444     if (prev_hook)
    445         return prev_hook(allocType, userData, size, blockType, requestNumber, filename, lineNumber);
    446     else
    447         return 1;
    448 }
    449 
    450 static void alloc_hook_initialize()
    451 {
    452     prev_hook = _CrtSetAllocHook(crt_alloc_hook);
    453 }
    454 
    455 static long get_memory_alloc_count()
    456 {
    457     return malloc_count;
    458 }
    459 
    460 #elif defined(USE_GLIBC_MALLOC_HOOK)
    461 
    462 // Set up malloc hooks to count allocations - see
    463 // http://www.gnu.org/software/libc/manual/html_node/Hooks-for-Malloc.html
    464 static intptr_t malloc_count = 0;
    465 static void *(*old_malloc_hook) (size_t, const void*);
    466 static std::mutex alloc_hook_mutex;
    467 static void *malloc_hook(size_t size, const void* UNUSED(caller))
    468 {
    469     // This doesn't really work across threads. The hooks are global variables, and
    470     // we have to temporarily unhook in order to call the real malloc, and during that
    471     // time period another thread may perform an unhooked (hence uncounted) allocation
    472     // which we will miss.
    473 
    474     // Two threads may execute the hook simultaneously, so we need to do the
    475     // temporary unhooking in a thread-safe way, so for simplicity we just use a mutex.
    476     std::lock_guard<std::mutex> lock(alloc_hook_mutex);
    477     ++malloc_count;
    478     __malloc_hook = old_malloc_hook;
    479     void* result = malloc(size);
    480     old_malloc_hook = __malloc_hook;
    481     __malloc_hook = malloc_hook;
    482     return result;
    483 }
    484 
    485 static void alloc_hook_initialize()
    486 {
    487     std::lock_guard<std::mutex> lock(alloc_hook_mutex);
    488     old_malloc_hook = __malloc_hook;
    489     __malloc_hook = malloc_hook;
    490     // (we don't want to bother hooking realloc and memalign, because if they allocate
    491     // new memory then they'll be caught by the malloc hook anyway)
    492 }
    493 /*
    494 It would be nice to do:
    495     __attribute__ ((visibility ("default"))) void (*__malloc_initialize_hook)() = malloc_initialize_hook;
    496 except that doesn't seem to work in practice, since something (?) resets the
    497 hook to NULL some time while loading the game, after we've set it here - so
    498 we just call malloc_initialize_hook once inside CProfileManager::Frame instead
    499 and hope nobody deletes our hook after that.
    500 */
    501 static long get_memory_alloc_count()
    502 {
    503     return malloc_count;
    504 }
    505 
    506 #elif defined(USE_GLIBC_MALLOC_OVERRIDE)
    507 
    508 static intptr_t alloc_count = 0;
    509 
    510 // We override the malloc/realloc/calloc/free functions and then use dlsym to
    511 // defer the actual allocation to the real libc implementation.
    512 // The dlsym call will (in glibc 2.9/2.10) call calloc once (to allocate an error
    513 // message structure), so we have a bootstrapping problem when trying to
    514 // get the first called function via dlsym. So we kludge it by returning a statically-allocated
    515 // buffer for the very first call to calloc after we've called dlsym.
    516 // This is quite hacky but it seems to just about work in practice...
    517 
    518 // TODO: KNOWN ISSUE: Use after free and infinite recursion
    519 // We assign the glibc free function to libc_free in our malloc/calloc function (with dlsym).
    520 // We did that in the free function before, but had to change it to work around the first problem described below.
    521 // It's not a good solution because some of the problems described here can reappear when the first
    522 // call to malloc/calloc changes and enters the function with a different state.
    523 //
    524 // Dl* functions (dlsym, dlopen etc.) store an error message internally if something fails.
    525 // Calling dlerror returns a pointer to this error message. Calling dlerror a second time or calling dlsym
    526 // causes it to free the internal storage for this error message.
    527 // This behaviour can cause two types of problems:
    528 //
    529 // 1. Infinite recursion due to free call
    530 // Problem occurs if: We use any of the dl* functions in our free function and free gets called with an internal
    531 // error message buffer allocated.
    532 // What happens: Our call to the dl* function causes another free-call insdie glibc which calls our free function
    533 // and can cause infinite recursion.
    534 //
    535 // 2. Use after free
    536 // Problem occurs if: An external library (or any other function) calls a dl* function that stores an internal
    537 // error string, then calls dlerror to receive the message and then calls any of our malloc/calloc/realloc/free fuctions.
    538 // Our function uses one of the dl* functions too. After calling our function, it tries to use the error message pointer
    539 // it got with dlerror before.
    540 // What happens: Our call to the dl* function will free the storage of the message and the pointer in the external library
    541 // becomes invalid. We get undefined behaviour if the extern library uses the error message pointer after that.
    542 
    543 
    544 static bool alloc_bootstrapped = false;
    545 static char alloc_bootstrap_buffer[32]; // sufficient for x86_64
    546 static bool alloc_has_called_dlsym = false;
    547 static void (*libc_free)(void*) = NULL;
    548 // (We'll only be running a single thread at this point so no need for locking these variables)
    549 
    550 //#define ALLOC_DEBUG
    551 
    552 void* malloc(size_t sz)
    553 {
    554     cpu_AtomicAdd(&alloc_count, 1);
    555 
    556     static void *(*libc_malloc)(size_t);
    557     if (libc_malloc == NULL)
    558     {
    559         alloc_has_called_dlsym = true;
    560         libc_malloc = (void *(*)(size_t)) dlsym(RTLD_NEXT, "malloc");
    561     }
    562     void* ret = libc_malloc(sz);
    563 #ifdef ALLOC_DEBUG
    564     printf("### malloc(%d) = %p\n", sz, ret);
    565 #endif
    566 
    567     if (libc_free == NULL)
    568         libc_free = (void (*)(void*)) dlsym(RTLD_NEXT, "free");
    569 
    570     return ret;
    571 }
    572 
    573 void* realloc(void* ptr, size_t sz)
    574 {
    575     cpu_AtomicAdd(&alloc_count, 1);
    576 
    577     static void *(*libc_realloc)(void*, size_t);
    578     if (libc_realloc == NULL)
    579     {
    580         alloc_has_called_dlsym = true;
    581         libc_realloc = (void *(*)(void*, size_t)) dlsym(RTLD_NEXT, "realloc");
    582     }
    583     void* ret = libc_realloc(ptr, sz);
    584 #ifdef ALLOC_DEBUG
    585     printf("### realloc(%p, %d) = %p\n", ptr, sz, ret);
    586 #endif
    587     return ret;
    588 }
    589 
    590 void* calloc(size_t nm, size_t sz)
    591 {
    592     cpu_AtomicAdd(&alloc_count, 1);
    593 
    594     static void *(*libc_calloc)(size_t, size_t);
    595     if (libc_calloc == NULL)
    596     {
    597         if (alloc_has_called_dlsym && !alloc_bootstrapped)
    598         {
    599             ENSURE(nm*sz <= ARRAY_SIZE(alloc_bootstrap_buffer));
    600 #ifdef ALLOC_DEBUG
    601             printf("### calloc-bs(%d, %d) = %p\n", nm, sz, alloc_bootstrap_buffer);
    602 #endif
    603             alloc_bootstrapped = true;
    604             return alloc_bootstrap_buffer;
    605         }
    606         alloc_has_called_dlsym = true;
    607         libc_calloc = (void *(*)(size_t, size_t)) dlsym(RTLD_NEXT, "calloc");
    608     }
    609     void* ret = libc_calloc(nm, sz);
    610 #ifdef ALLOC_DEBUG
    611     printf("### calloc(%d, %d) = %p\n", nm, sz, ret);
    612 #endif
    613 
    614     if (libc_free == NULL)
    615         libc_free = (void (*)(void*)) dlsym(RTLD_NEXT, "free");
    616 
    617     return ret;
    618 }
    619 
    620 void free(void* ptr)
    621 {
    622     // Might be triggered if free is called before any calloc/malloc calls or if the dlsym call inside
    623     // our calloc/malloc function causes a free call. Read the known issue comment block a few lines above.
    624     ENSURE (libc_free != NULL);
    625 
    626     libc_free(ptr);
    627 #ifdef ALLOC_DEBUG
    628     printf("### free(%p)\n", ptr);
    629 #endif
    630 }
    631 
    632 static void alloc_hook_initialize()
    633 {
    634 }
    635 
    636 static long get_memory_alloc_count()
    637 {
    638     return alloc_count;
    639 }
    640 
    641 #else
    642 
    643 static void alloc_hook_initialize()
    644 {
    645 }
    646 static long get_memory_alloc_count()
    647 {
    648     // TODO: don't show this column of data when we don't have sensible values
    649     // to display.
    650     return 0;
    651 }
    652 #endif
    653 
    654382void CProfileNode::Call()
    655383{
     
    659387    {
    660388        start = timer_Time();
    661         start_mallocs = get_memory_alloc_count();
    662389    }
    663390}
     
    669396
    670397    double now = timer_Time();
    671     long allocs = get_memory_alloc_count();
    672398    time_frame_current += (now - start);
    673399    time_turn_current += (now - start);
    674     mallocs_frame_current += (allocs - start_mallocs);
    675     mallocs_turn_current += (allocs - start_mallocs);
    676400    return true;
    677401}
     
    715439void CProfileManager::Frame()
    716440{
    717     ONCE(alloc_hook_initialize());
    718 
    719441    root->time_frame_current += (timer_Time() - root->start);
    720     root->mallocs_frame_current += (get_memory_alloc_count() - root->start_mallocs);
    721442
    722443    root->Frame();
     
    729450
    730451    root->start = timer_Time();
    731     root->start_mallocs = get_memory_alloc_count();
    732452}
    733453
  • ps/trunk/source/ps/Profile.h

    r25394 r26805  
    1 /* Copyright (C) 2021 Wildfire Games.
     1/* Copyright (C) 202 Wildfire Games.
    22 * This file is part of 0 A.D.
    33 *
     
    6363    double GetTurnCalls() const;
    6464    double GetTurnTime() const;
    65     double GetFrameMallocs() const;
    66     double GetTurnMallocs() const;
    6765
    6866    const CProfileNode* GetChild( const char* name ) const;
     
    104102    RingBuf<double, PROFILE_AMORTIZE_TURNS> time_per_turn;
    105103
    106     long mallocs_frame_current;
    107     long mallocs_turn_current;
    108     RingBuf<long, PROFILE_AMORTIZE_FRAMES> mallocs_per_frame;
    109     RingBuf<long, PROFILE_AMORTIZE_TURNS> mallocs_per_turn;
    110 
    111104    double start;
    112     long start_mallocs;
    113105    int recursion;
    114106
Note: See TracChangeset for help on using the changeset viewer.