Windows
Some of the above values are easily available from the appropriate Win32 API, I just list them here for completeness. Others, however, need to be obtained from the Performance Data Helper library (PDH), which is a bit "unintuitive" and takes a lot of painful trial and error to get to work. (At least it took me quite a while, perhaps I've been only a bit stupid...)
Note: for clarity all error checking has been omitted from the following code. Do check the return codes...!
Total Virtual Memory:
#include "windows.h"
MEMORYSTATUSEX memInfo;
memInfo.dwLength = sizeof(MEMORYSTATUSEX);
GlobalMemoryStatusEx(&memInfo);
DWORDLONG totalVirtualMem = memInfo.ullTotalPageFile;
Note: The name "TotalPageFile" is a bit misleading here. In reality this parameter gives the "Virtual Memory Size", which is size of swap file plus installed RAM.
Virtual Memory currently used:
Same code as in "Total Virtual Memory" and then
DWORDLONG virtualMemUsed = memInfo.ullTotalPageFile - memInfo.ullAvailPageFile;
Virtual Memory currently used by current process:
#include "windows.h"
#include "psapi.h"
PROCESS_MEMORY_COUNTERS_EX pmc;
GetProcessMemoryInfo(GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS*)&pmc, sizeof(pmc));
SIZE_T virtualMemUsedByMe = pmc.PrivateUsage;
Total Physical Memory (RAM):
Same code as in "Total Virtual Memory" and then
DWORDLONG totalPhysMem = memInfo.ullTotalPhys;
Physical Memory currently used:
Same code as in "Total Virtual Memory" and then
DWORDLONG physMemUsed = memInfo.ullTotalPhys - memInfo.ullAvailPhys;
Physical Memory currently used by current process:
Same code as in "Virtual Memory currently used by current process" and then
SIZE_T physMemUsedByMe = pmc.WorkingSetSize;
CPU currently used:
#include "TCHAR.h"
#include "pdh.h"
static PDH_HQUERY cpuQuery;
static PDH_HCOUNTER cpuTotal;
void init(){
PdhOpenQuery(NULL, NULL, &cpuQuery);
// You can also use L"\\Processor(*)\\% Processor Time" and get individual CPU values with PdhGetFormattedCounterArray()
PdhAddEnglishCounter(cpuQuery, L"\\Processor(_Total)\\% Processor Time", NULL, &cpuTotal);
PdhCollectQueryData(cpuQuery);
}
double getCurrentValue(){
PDH_FMT_COUNTERVALUE counterVal;
PdhCollectQueryData(cpuQuery);
PdhGetFormattedCounterValue(cpuTotal, PDH_FMT_DOUBLE, NULL, &counterVal);
return counterVal.doubleValue;
}
CPU currently used by current process:
#include "windows.h"
static ULARGE_INTEGER lastCPU, lastSysCPU, lastUserCPU;
static int numProcessors;
static HANDLE self;
void init(){
SYSTEM_INFO sysInfo;
FILETIME ftime, fsys, fuser;
GetSystemInfo(&sysInfo);
numProcessors = sysInfo.dwNumberOfProcessors;
GetSystemTimeAsFileTime(&ftime);
memcpy(&lastCPU, &ftime, sizeof(FILETIME));
self = GetCurrentProcess();
GetProcessTimes(self, &ftime, &ftime, &fsys, &fuser);
memcpy(&lastSysCPU, &fsys, sizeof(FILETIME));
memcpy(&lastUserCPU, &fuser, sizeof(FILETIME));
}
double getCurrentValue(){
FILETIME ftime, fsys, fuser;
ULARGE_INTEGER now, sys, user;
double percent;
GetSystemTimeAsFileTime(&ftime);
memcpy(&now, &ftime, sizeof(FILETIME));
GetProcessTimes(self, &ftime, &ftime, &fsys, &fuser);
memcpy(&sys, &fsys, sizeof(FILETIME));
memcpy(&user, &fuser, sizeof(FILETIME));
percent = (sys.QuadPart - lastSysCPU.QuadPart) +
(user.QuadPart - lastUserCPU.QuadPart);
percent /= (now.QuadPart - lastCPU.QuadPart);
percent /= numProcessors;
lastCPU = now;
lastUserCPU = user;
lastSysCPU = sys;
return percent * 100;
}
Linux
On Linux the choice that seemed obvious at first was to use the POSIX APIs like getrusage()
etc. I spent some time trying to get this to work, but never got meaningful values. When I finally checked the kernel sources themselves, I found out that apparently these APIs are not yet completely implemented as of Linux kernel 2.6!?
In the end I got all values via a combination of reading the pseudo-filesystem /proc
and kernel calls.
Total Virtual Memory:
#include "sys/types.h"
#include "sys/sysinfo.h"
struct sysinfo memInfo;
sysinfo (&memInfo);
long long totalVirtualMem = memInfo.totalram;
//Add other values in next statement to avoid int overflow on right hand side...
totalVirtualMem += memInfo.totalswap;
totalVirtualMem *= memInfo.mem_unit;
Virtual Memory currently used:
Same code as in "Total Virtual Memory" and then
long long virtualMemUsed = memInfo.totalram - memInfo.freeram;
//Add other values in next statement to avoid int overflow on right hand side...
virtualMemUsed += memInfo.totalswap - memInfo.freeswap;
virtualMemUsed *= memInfo.mem_unit;
Virtual Memory currently used by current process:
#include "stdlib.h"
#include "stdio.h"
#include "string.h"
int parseLine(char* line){
// This assumes that a digit will be found and the line ends in " Kb".
int i = strlen(line);
const char* p = line;
while (*p <'0' || *p > '9') p++;
line[i-3] = '\0';
i = atoi(p);
return i;
}
int getValue(){ //Note: this value is in KB!
FILE* file = fopen("/proc/self/status", "r");
int result = -1;
char line[128];
while (fgets(line, 128, file) != NULL){
if (strncmp(line, "VmSize:", 7) == 0){
result = parseLine(line);
break;
}
}
fclose(file);
return result;
}
Total Physical Memory (RAM):
Same code as in "Total Virtual Memory" and then
long long totalPhysMem = memInfo.totalram;
//Multiply in next statement to avoid int overflow on right hand side...
totalPhysMem *= memInfo.mem_unit;
Physical Memory currently used:
Same code as in "Total Virtual Memory" and then
long long physMemUsed = memInfo.totalram - memInfo.freeram;
//Multiply in next statement to avoid int overflow on right hand side...
physMemUsed *= memInfo.mem_unit;
Physical Memory currently used by current process:
Change getValue() in "Virtual Memory currently used by current process" as follows:
int getValue(){ //Note: this value is in KB!
FILE* file = fopen("/proc/self/status", "r");
int result = -1;
char line[128];
while (fgets(line, 128, file) != NULL){
if (strncmp(line, "VmRSS:", 6) == 0){
result = parseLine(line);
break;
}
}
fclose(file);
return result;
}
CPU currently used:
#include "stdlib.h"
#include "stdio.h"
#include "string.h"
static unsigned long long lastTotalUser, lastTotalUserLow, lastTotalSys, lastTotalIdle;
void init(){
FILE* file = fopen("/proc/stat", "r");
fscanf(file, "cpu %llu %llu %llu %llu", &lastTotalUser, &lastTotalUserLow,
&lastTotalSys, &lastTotalIdle);
fclose(file);
}
double getCurrentValue(){
double percent;
FILE* file;
unsigned long long totalUser, totalUserLow, totalSys, totalIdle, total;
file = fopen("/proc/stat", "r");
fscanf(file, "cpu %llu %llu %llu %llu", &totalUser, &totalUserLow,
&totalSys, &totalIdle);
fclose(file);
if (totalUser < lastTotalUser || totalUserLow < lastTotalUserLow ||
totalSys < lastTotalSys || totalIdle < lastTotalIdle){
//Overflow detection. Just skip this value.
percent = -1.0;
}
else{
total = (totalUser - lastTotalUser) + (totalUserLow - lastTotalUserLow) +
(totalSys - lastTotalSys);
percent = total;
total += (totalIdle - lastTotalIdle);
percent /= total;
percent *= 100;
}
lastTotalUser = totalUser;
lastTotalUserLow = totalUserLow;
lastTotalSys = totalSys;
lastTotalIdle = totalIdle;
return percent;
}
CPU currently used by current process:
#include "stdlib.h"
#include "stdio.h"
#include "string.h"
#include "sys/times.h"
#include "sys/vtimes.h"
static clock_t lastCPU, lastSysCPU, lastUserCPU;
static int numProcessors;
void init(){
FILE* file;
struct tms timeSample;
char line[128];
lastCPU = times(&timeSample);
lastSysCPU = timeSample.tms_stime;
lastUserCPU = timeSample.tms_utime;
file = fopen("/proc/cpuinfo", "r");
numProcessors = 0;
while(fgets(line, 128, file) != NULL){
if (strncmp(line, "processor", 9) == 0) numProcessors++;
}
fclose(file);
}
double getCurrentValue(){
struct tms timeSample;
clock_t now;
double percent;
now = times(&timeSample);
if (now <= lastCPU || timeSample.tms_stime < lastSysCPU ||
timeSample.tms_utime < lastUserCPU){
//Overflow detection. Just skip this value.
percent = -1.0;
}
else{
percent = (timeSample.tms_stime - lastSysCPU) +
(timeSample.tms_utime - lastUserCPU);
percent /= (now - lastCPU);
percent /= numProcessors;
percent *= 100;
}
lastCPU = now;
lastSysCPU = timeSample.tms_stime;
lastUserCPU = timeSample.tms_utime;
return percent;
}
TODO: Other Platforms
I would assume, that some of the Linux code also works for the Unixes, except for the parts that read the /proc pseudo-filesystem. Perhaps on Unix these parts can be replaced by getrusage()
and similar functions?
On OSX the Activity Monitor gives you actually a very good guess.
Private memory is for sure memory that is only used by your application. E.g. stack memory and all memory dynamically reserved using malloc() and comparable functions/methods (alloc method for Objective-C) is private memory. If you fork, private memory will be shared with you child, but marked copy-on-write. That means as long as a page is not modified by either process (parent or child) it is shared between them. As soon as either process modifies any page, this page is copied before it is modified. Even while this memory is shared with fork children (and it can only be shared with fork children), it is still shown as "private" memory, because in the worst case, every page of it will get modified (sooner or later) and then it is again private to each process again.
Shared memory is either memory that is currently shared (the same pages are visible in the virtual process space of different processes) or that is likely to become shared in the future (e.g. read-only memory, since there is no reason for not sharing read-only memory). At least that's how I read the source code of some command line tools from Apple. So if you share memory between processes using mmap (or a comparable call that maps the same memory into multiple processes), this would be shared memory. However the executable code itself is also shared memory, since if another instance of your application is started there is no reason why it may not share the code already loaded in memory (executable code pages are read-only by default, unless you are running your app in a debugger). Thus shared memory is really memory used by your application, just like private one, but it might additionally be shared with another process (or it might not, but why would it not count towards your application if it was shared?)
Real memory is the amount of RAM currently "assigned" to your process, no matter if private or shared. This can be exactly the sum of private and shared, but usually it is not. Your process might have more memory assigned to it than it currently needs (this speeds up requests for more memory in the future), but that is no loss to the system. If another process needs memory and no free memory is available, before the system starts swapping, it will take that extra memory away from your process and assign it another process (which is a fast and painless operation); therefor your next malloc call might be somewhat slower. Real memory can also be smaller than private and physical memory; this is because if your process requests memory from the system, it will only receive "virtual memory". This virtual memory is not linked to any real memory pages as long as you don't use it (so malloc 10 MB of memory, use only one byte of it, your process will get only a single page, 4096 byte, of memory assigned - the rest is only assigned if you actually ever need it). Further memory that is swapped may not count towards real memory either (not sure about this), but it will count towards shared and private memory.
Virtual memory is the sum of all address blocks that are consider valid in your apps process space. These addresses might be linked to physical memory (that is again private or shared), or they might not, but in that case they will be linked to physical memory as soon as you use the address. Accessing memory addresses outside of the known addresses will cause a SIGBUS and your app will crash. When memory is swapped, the virtual address space for this memory remains valid and accessing those addresses causes memory to be swapped back in.
Conclusion:
If your app does not explicitly or implicitly use shared memory, private memory is the amount of memory your app needs because of the stack size (or sizes if multithreaded) and because of the malloc() calls you made for dynamic memory. You don't have to care a lot for shared or real memory in that case.
If your app uses shared memory, and this includes a graphical UI, where memory is shared between your application and the WindowServer for example, then you might have a look at shared memory as well. A very high shared memory number may mean you have too many graphical resources loaded in memory at the moment.
Real memory is of little interest for app development. If it is bigger than the sum of shared and private, then this means nothing other than that the system is lazy at taken memory away from your process. If it is smaller, then your process has requested more memory than it actually needed, which is not bad either, since as long as you don't use all of the requested memory, you are not "stealing" memory from the system. If it is much smaller than the sum of shared and private, you may only consider to request less memory where possible, as you are a bit over-requesting memory (again, this is not bad, but it tells me that your code is not optimized for minimal memory usage and if it is cross platform, other platforms may not have such a sophisticated memory handling, so you may prefer to alloc many small blocks instead of a few big ones for example, or free memory a lot sooner, and so on).
If you are still not happy with all that information, you can get even more information. Open a terminal and run:
sudo vmmap <pid>
where is the process ID of your process. This will show you statistics for EVERY block of memory in your process space with start and end address. It will also tell you where this memory came from (A mapped file? Stack memory? Malloc'ed memory? A __DATA or __TEXT section of your executable?), how big it is in KB, the access rights and whether it is private, shared or copy-on-write. If it is mapped from a file, it will even give you the path to the file.
If you want only "actual" RAM usage, use
sudo vmmap -resident <pid>
Now it will show for every memory block how big the memory block is virtually and how much of it is really currently present in physical memory.
At the end of each dump is also an overview table with the sums of different memory types. This table looks like this for Firefox right now on my system:
REGION TYPE [ VIRTUAL/RESIDENT]
=========== [ =======/========]
ATS (font support) [ 33.8M/ 2496K]
CG backing stores [ 5588K/ 5460K]
CG image [ 20K/ 20K]
CG raster data [ 576K/ 576K]
CG shared images [ 2572K/ 2404K]
Carbon [ 1516K/ 1516K]
CoreGraphics [ 8K/ 8K]
IOKit [ 256.0M/ 0K]
MALLOC [ 256.9M/ 247.2M]
Memory tag=240 [ 4K/ 4K]
Memory tag=242 [ 12K/ 12K]
Memory tag=243 [ 8K/ 8K]
Memory tag=249 [ 156K/ 76K]
STACK GUARD [ 101.2M/ 9908K]
Stack [ 14.0M/ 248K]
VM_ALLOCATE [ 25.9M/ 25.6M]
__DATA [ 6752K/ 3808K]
__DATA/__OBJC [ 28K/ 28K]
__IMAGE [ 1240K/ 112K]
__IMPORT [ 104K/ 104K]
__LINKEDIT [ 30.7M/ 3184K]
__OBJC [ 1388K/ 1336K]
__OBJC/__DATA [ 72K/ 72K]
__PAGEZERO [ 4K/ 0K]
__TEXT [ 108.6M/ 63.5M]
__UNICODE [ 536K/ 512K]
mapped file [ 118.8M/ 50.8M]
shared memory [ 300K/ 276K]
shared pmap [ 6396K/ 3120K]
What does this tell us? E.g. the Firefox binary and all library it loads have 108 MB data together in their __TEXT sections, but currently only 63 MB of those are currently resident in memory. The font support (ATS) needs 33 MB, but only about 2.5 MB are really in memory. It uses a bit over 5 MB CG backing stores, CG = Core Graphics, those are most likely window contents, buttons, images and other data that is cached for fast drawing. It has requested 256 MB via malloc calls and currently 247 MB are really in mapped to memory pages. It has 14 MB space reserved for stacks, but only 248 KB stack space is really in use right now.
vmmap also has a good summary above the table
ReadOnly portion of Libraries: Total=139.3M resident=66.6M(48%) swapped_out_or_unallocated=72.7M(52%)
Writable regions: Total=595.4M written=201.8M(34%) resident=283.1M(48%) swapped_out=0K(0%) unallocated=312.3M(52%)
And this shows an interesting aspect of the OS X: For read only memory that comes from libraries, it plays no role if it is swapped out or simply unallocated; there is only resident and not resident. For writable memory this makes a difference (in my case 52% of all requested memory has never been used and is such unallocated, 0% of memory has been swapped out to disk).
The reason for that is simple: Read-only memory from mapped files is not swapped. If the memory is needed by the system, the current pages are simply dropped from the process, as the memory is already "swapped". It consisted only of content mapped directly from files and this content can be remapped whenever needed, as the files are still there. That way this memory won't waste space in the swap file either. Only writable memory must first be swapped to file before it is dropped, as its content wasn't stored on disk before.
Best Answer
With
ps
or similar tools you will only get the amount of memory pages allocated by that process. This number is correct, but:does not reflect the actual amount of memory used by the application, only the amount of memory reserved for it
can be misleading if pages are shared, for example by several threads or by using dynamically linked libraries
If you really want to know what amount of memory your application actually uses, you need to run it within a profiler. For example, Valgrind can give you insights about the amount of memory used, and, more importantly, about possible memory leaks in your program. The heap profiler tool of Valgrind is called 'massif':
Massif is a heap profiler. It performs detailed heap profiling by taking regular snapshots of a program's heap. It produces a graph showing heap usage over time, including information about which parts of the program are responsible for the most memory allocations. The graph is supplemented by a text or HTML file that includes more information for determining where the most memory is being allocated. Massif runs programs about 20x slower than normal.
As explained in the Valgrind documentation, you need to run the program through Valgrind:
Massif writes a dump of memory usage snapshots (e.g.
massif.out.12345
). These provide, (1) a timeline of memory usage, (2) for each snapshot, a record of where in your program memory was allocated. A great graphical tool for analyzing these files is massif-visualizer. But I foundms_print
, a simple text-based tool shipped with Valgrind, to be of great help already.To find memory leaks, use the (default)
memcheck
tool of valgrind.