C++ – Calculating %CPU Usage Using Proc Files

clinux

I would like to dedicate a thread in my program to gathering metrics on its performance. Memory usage, CPU etc. I've been trying to do this using the /proc/stat and /proc/pid/stat files. I'm currently stuck at trying to measure the %CPU usage. The values reported by my program are totally out of line with what 'top' is reporting. I'm tried this on a few different linux distros and am seeing the same results on each.

Here is the code I am using to calculate the percentage. Can anyone spot any issues here?

https://github.com/mmcilroy/cpu_usage

#include <stdlib.h>
#include <sys/types.h>
#include <sys/times.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>

struct pstat {
    long unsigned int utime_ticks;
    long int cutime_ticks;
    long unsigned int stime_ticks;
    long int cstime_ticks;
    long unsigned int vsize; // virtual memory size in bytes
    long unsigned int rss; //Resident  Set  Size in bytes
    long unsigned int cpu_total_time;
};

int get_usage(const pid_t pid, struct pstat* result) {

    //convert  pid to string
    char pid_s[20];
    snprintf(pid_s, sizeof(pid_s), "%d", pid);

    char stat_filepath[30] = "/proc/"; strncat(stat_filepath, pid_s,
            sizeof(stat_filepath) - strlen(stat_filepath) -1);
    strncat(stat_filepath, "/stat", sizeof(stat_filepath) -
            strlen(stat_filepath) -1);

    FILE *fpstat = fopen(stat_filepath, "r");
    if (fpstat == NULL) {
        perror("FOPEN ERROR ");
        return -1;
    }

    FILE *fstat = fopen("/proc/stat", "r");
    if (fstat == NULL) {
        perror("FOPEN ERROR ");
        fclose(fstat);
        return -1;
    }

    //read values from /proc/pid/stat
    bzero(result, sizeof(struct pstat));
    long int rss;
    if (fscanf(fpstat, "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %lu"
                "%lu %ld %ld %*d %*d %*d %*d %*u %lu %ld",
                &result->utime_ticks, &result->stime_ticks,
                &result->cutime_ticks, &result->cstime_ticks, &result->vsize,
                &rss) == EOF) {
        fclose(fpstat);
        return -1;
    }
    fclose(fpstat);
    result->rss = rss * getpagesize();

    //read+calc cpu total time from /proc/stat
    long unsigned int cpu_time[10];
    bzero(cpu_time, sizeof(cpu_time));
    if (fscanf(fstat, "%*s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu",
                &cpu_time[0], &cpu_time[1], &cpu_time[2], &cpu_time[3],
                &cpu_time[4], &cpu_time[5], &cpu_time[6], &cpu_time[7],
                &cpu_time[8], &cpu_time[9]) == EOF) {
        fclose(fstat);
        return -1;
    }

    fclose(fstat);

    for(int i=0; i < 4;i++)
        result->cpu_total_time += cpu_time[i];

    printf( "usage: cpu %lu, utime %lu, stime %lu\n", result->cpu_total_time, result->utime_ticks, result->stime_ticks );

    return 0;
}

void calc_cpu_usage_pct(const struct pstat* cur_usage,
                        const struct pstat* last_usage,
                        double* usage)
{
    printf( "delta: cpu %lu, utime %lu, stime %lu\n",
        cur_usage->cpu_total_time - last_usage->cpu_total_time,
        cur_usage->utime_ticks - last_usage->utime_ticks,
        cur_usage->stime_ticks - last_usage->stime_ticks );

    const long unsigned int cpu_diff = cur_usage->cpu_total_time - last_usage->cpu_total_time;
    const long unsigned int pid_diff =
        ( cur_usage->utime_ticks + cur_usage->utime_ticks + cur_usage->stime_ticks - cur_usage->stime_ticks ) -
        ( last_usage->utime_ticks + last_usage->utime_ticks + last_usage->stime_ticks - last_usage->stime_ticks );

    *usage = 100.0 * ( (double)pid_diff / (double)cpu_diff );
}

int main( int argc, char* argv[] )
{
    pstat prev, curr;
    double pct;

    struct tms t;
    times( &t );

    if( argc <= 1 ) {
        printf( "please supply a pid\n" ); return 1;
    }

    while( 1 )
    {
        if( get_usage(atoi(argv[1]), &prev) == -1 ) {
            printf( "error\n" );
        }

        sleep( 5 );

        if( get_usage(atoi(argv[1]), &curr) == -1 ) {
            printf( "error\n" );
        }

        calc_cpu_usage_pct(&curr, &prev, &pct);

        printf("%%cpu: %.02f\n", pct);
    }
}

If you want to try it out for yourself, the program expect 1 arguments – the pid of a process to monitor

Best Answer

I know this is a bit old but I can explain why your new equation works: (1/INTERVAL) * (pid diff)

It's just a simplification of the basic percentage equation 100 * (pid diff) / (cpu diff), which looks like what you were trying to do in your first example.

The cpu time in /proc/stat (and the utime and stime in /proc/pid/stat) is reported in USER_HZ (or jiffies). This value is usually 1/100 of a second. This means that there will be 100 "tics" in each second for the CPU, which means your "CPU diff" will be INTERVAL*100.

Substitute that in and you get:

100 * (pid diff) / (INTERVAL * 100)

Cancel out the 100's and you are left with:

(pid diff) / INTERVAL

Which is the same as what you are now using. This also means that if you did indeed correct the problems you have in the code at the top, then that should work as well. The pid diff should be (curr utime + curr stime) - (prev utime + prev stime). If it doesn't work, then perhaps the way you are adding up the CPU time is wrong? It'd be easy to test because you know what value it should be (INTERVAL*100).

Since you now have a working equation, you may not care to figure out the problem with the original code but keep in mind that if you ever try to use it on a system where USER_HZ is not 1/100, the equation will be invalid.