Project

General

Profile

Statistics
| Branch: | Tag: | Revision:

birq / birq.c @ 1ebf1cde

History | View | Annotate | Download (14 KB)

1
/*
2
 * birq
3
 *
4
 * Balance IRQ
5
 *
6
 */
7

    
8
#ifdef HAVE_CONFIG_H
9
#include "config.h"
10
#endif /* HAVE_CONFIG_H */
11

    
12
#include <stdio.h>
13
#include <stdlib.h>
14
#include <unistd.h>
15
#include <sys/types.h>
16
#include <errno.h>
17
#include <assert.h>
18
#include <string.h>
19
#include <signal.h>
20
#include <syslog.h>
21
#include <fcntl.h>
22
#include <time.h>
23
#ifdef HAVE_GETOPT_H
24
#include <getopt.h>
25
#endif
26

    
27
#include "birq.h"
28
#include "lub/log.h"
29
#include "lub/list.h"
30
#include "lub/ini.h"
31
#include "irq.h"
32
#include "numa.h"
33
#include "cpu.h"
34
#include "statistics.h"
35
#include "balance.h"
36
#include "pxm.h"
37

    
38
#ifndef VERSION
39
#define VERSION "1.2.0"
40
#endif
41

    
42
/* Signal handlers */
43
static volatile int sigterm = 0; /* Exit if 1 */
44
static void sighandler(int signo);
45

    
46
static volatile int sighup = 0; /* Re-read config file */
47
static void sighup_handler(int signo);
48

    
49
static void help(int status, const char *argv0);
50
static struct options *opts_init(void);
51
static void opts_free(struct options *opts);
52
static int opts_parse(int argc, char *argv[], struct options *opts);
53
static int parse_config(const char *fname, struct options *opts);
54

    
55
/* Command line options */
56
struct options {
57
        char *pidfile;
58
        char *cfgfile;
59
        int cfgfile_userdefined;
60
        char *pxm; /* Proximity config file */
61
        int debug; /* Don't daemonize in debug mode */
62
        int log_facility;
63
        float threshold;
64
        float load_limit;
65
        int verbose;
66
        int ht;
67
        unsigned int long_interval;
68
        unsigned int short_interval;
69
        birq_choose_strategy_e strategy;
70
        cpumask_t exclude_cpus;
71
};
72

    
73
/*--------------------------------------------------------- */
74
int main(int argc, char **argv)
75
{
76
        int retval = -1;
77
        struct options *opts = NULL;
78
        int pidfd = -1;
79
        unsigned int interval;
80

    
81
        /* Signal vars */
82
        struct sigaction sig_act;
83
        sigset_t sig_set;
84

    
85
        /* IRQ list. It contain all found IRQs. */
86
        lub_list_t *irqs;
87
        /* IRQs need to be balanced */
88
        lub_list_t *balance_irqs;
89
        /* CPU list. It contain all found CPUs. */
90
        lub_list_t *cpus;
91
        /* NUMA list. It contain all found NUMA nodes. */
92
        lub_list_t *numas;
93
        /* Proximity list. */
94
        lub_list_t *pxms;
95

    
96
        /* Parse command line options */
97
        opts = opts_init();
98
        if (opts_parse(argc, argv, opts))
99
                goto err;
100

    
101
        /* Parse config file */
102
        if (!access(opts->cfgfile, R_OK)) {
103
                if (parse_config(opts->cfgfile, opts))
104
                        goto err;
105
        } else if (opts->cfgfile_userdefined) {
106
                fprintf(stderr, "Error: Can't find config file %s\n",
107
                        opts->cfgfile);
108
                goto err;
109
        }
110

    
111
        /* Validate threshold and load limit */
112
        if (opts->load_limit > opts->threshold) {
113
                fprintf(stderr, "Error: The load limit is greater than threshold.\n");
114
                goto err;
115
        }
116

    
117
        /* Initialize syslog */
118
        openlog(argv[0], LOG_CONS, opts->log_facility);
119
        syslog(LOG_ERR, "Start daemon.\n");
120

    
121
        /* Fork the daemon */
122
        if (!opts->debug) {
123
                /* Daemonize */
124
                if (daemon(0, 0) < 0) {
125
                        syslog(LOG_ERR, "Can't daemonize\n");
126
                        goto err;
127
                }
128

    
129
                /* Write pidfile */
130
                if ((pidfd = open(opts->pidfile,
131
                        O_WRONLY | O_CREAT | O_EXCL | O_TRUNC,
132
                        S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)) < 0) {
133
                        syslog(LOG_WARNING, "Can't open pidfile %s: %s\n",
134
                                opts->pidfile, strerror(errno));
135
                } else {
136
                        char str[20];
137
                        snprintf(str, sizeof(str), "%u\n", getpid());
138
                        str[sizeof(str) - 1] = '\0';
139
                        if (write(pidfd, str, strlen(str)) < 0)
140
                                syslog(LOG_WARNING, "Can't write to %s: %s\n",
141
                                        opts->pidfile, strerror(errno));
142
                        close(pidfd);
143
                }
144
        }
145

    
146
        /* Set signal handler */
147
        sigemptyset(&sig_set);
148
        sigaddset(&sig_set, SIGTERM);
149
        sigaddset(&sig_set, SIGINT);
150
        sigaddset(&sig_set, SIGQUIT);
151

    
152
        sig_act.sa_flags = 0;
153
        sig_act.sa_mask = sig_set;
154
        sig_act.sa_handler = &sighandler;
155
        sigaction(SIGTERM, &sig_act, NULL);
156
        sigaction(SIGINT, &sig_act, NULL);
157
        sigaction(SIGQUIT, &sig_act, NULL);
158

    
159
        /* SIGHUP handler */
160
        sigemptyset(&sig_set);
161
        sigaddset(&sig_set, SIGHUP);
162

    
163
        sig_act.sa_flags = 0;
164
        sig_act.sa_mask = sig_set;
165
        sig_act.sa_handler = &sighup_handler;
166
        sigaction(SIGHUP, &sig_act, NULL);
167

    
168
        /* Randomize */
169
        srand(time(NULL));
170

    
171
        /* Scan NUMA nodes */
172
        numas = lub_list_new(numa_list_compare);
173
        scan_numas(numas);
174
        if (opts->verbose)
175
                show_numas(numas);
176

    
177
        /* Scan CPUs */
178
        cpus = lub_list_new(cpu_list_compare);
179
        scan_cpus(cpus, opts->ht);
180
        if (opts->verbose)
181
                show_cpus(cpus);
182

    
183
        /* Prepare data structures */
184
        irqs = lub_list_new(irq_list_compare);
185
        balance_irqs = lub_list_new(irq_list_compare);
186

    
187
        /* Parse proximity file */
188
        pxms = lub_list_new(NULL);
189
        if (opts->pxm)
190
                parse_pxm_config(opts->pxm, pxms, numas);
191
        if (opts->verbose)
192
                show_pxms(pxms);
193

    
194
        /* Main loop */
195
        while (!sigterm) {
196
                lub_list_node_t *node;
197
                char outstr[10];
198
                time_t t;
199
                struct tm *tmp;
200

    
201
                t = time(NULL);
202
                tmp = localtime(&t);
203
                if (tmp) {
204
                        strftime(outstr, sizeof(outstr), "%H:%M:%S", tmp);
205
                        printf("----[ %s ]----------------------------------------------------------------\n", outstr);
206
                }
207

    
208
                /* Re-read config file on SIGHUP */
209
                if (sighup) {
210
                        if (!access(opts->cfgfile, R_OK)) {
211
                                syslog(LOG_ERR, "Re-reading config file\n");
212
                                if (parse_config(opts->cfgfile, opts))
213
                                        syslog(LOG_ERR, "Error while config file parsing.\n");
214
                        } else if (opts->cfgfile_userdefined)
215
                                syslog(LOG_ERR, "Can't find config file.\n");
216
                        sighup = 0;
217
                }
218

    
219
                /* Rescan PCI devices for new IRQs. */
220
                scan_irqs(irqs, balance_irqs, pxms);
221
                if (opts->verbose)
222
                        irq_list_show(irqs);
223
                /* Link IRQs to CPUs due to real current smp affinity. */
224
                link_irqs_to_cpus(cpus, irqs);
225

    
226
                /* Gather statistics on CPU load and number of interrupts. */
227
                gather_statistics(cpus, irqs);
228
                show_statistics(cpus, opts->verbose);
229
                /* Choose IRQ to move to another CPU. */
230
                choose_irqs_to_move(cpus, balance_irqs,
231
                        opts->threshold, opts->strategy);
232

    
233
                /* Balance IRQs */
234
                if (lub_list_len(balance_irqs) != 0) {
235
                        /* Set short interval to make balancing faster. */
236
                        interval = opts->short_interval;
237
                        /* Choose new CPU for IRQs need to be balanced. */
238
                        balance(cpus, balance_irqs, opts->load_limit);
239
                        /* Write new values to /proc/irq/<IRQ>/smp_affinity */
240
                        apply_affinity(balance_irqs);
241
                        /* Free list of balanced IRQs */
242
                        while ((node = lub_list__get_tail(balance_irqs))) {
243
                                lub_list_del(balance_irqs, node);
244
                                lub_list_node_free(node);
245
                        }
246
                } else {
247
                        /* If nothing to balance */
248
                        interval = opts->long_interval;
249
                }
250
                
251
                /* Wait before next iteration */
252
                sleep(interval);
253
        }
254

    
255
        /* Free data structures */
256
        irq_list_free(irqs);
257
        lub_list_free(balance_irqs);
258
        cpu_list_free(cpus);
259
        numa_list_free(numas);
260
        pxm_list_free(pxms);
261

    
262
        retval = 0;
263
err:
264
        /* Remove pidfile */
265
        if (pidfd >= 0) {
266
                if (unlink(opts->pidfile) < 0) {
267
                        syslog(LOG_ERR, "Can't remove pid-file %s: %s\n",
268
                        opts->pidfile, strerror(errno));
269
                }
270
        }
271

    
272
        /* Free command line options */
273
        opts_free(opts);
274
        syslog(LOG_ERR, "Stop daemon.\n");
275

    
276
        return retval;
277
}
278

    
279
/*--------------------------------------------------------- */
280
/* Signal handler for temination signals (like SIGTERM, SIGINT, ...) */
281
static void sighandler(int signo)
282
{
283
        sigterm = 1;
284
        signo = signo; /* Happy compiler */
285
}
286

    
287
/*--------------------------------------------------------- */
288
/* Re-read config file on SIGHUP */
289
static void sighup_handler(int signo)
290
{
291
        sighup = 1;
292
        signo = signo; /* Happy compiler */
293
}
294

    
295
/*--------------------------------------------------------- */
296
/* Initialize option structure by defaults */
297
static struct options *opts_init(void)
298
{
299
        struct options *opts = NULL;
300

    
301
        opts = malloc(sizeof(*opts));
302
        assert(opts);
303
        opts->debug = 0; /* daemonize by default */
304
        opts->pidfile = strdup(BIRQ_PIDFILE);
305
        opts->cfgfile = strdup(BIRQ_CFGFILE);
306
        opts->cfgfile_userdefined = 0;
307
        opts->pxm = NULL;
308
        opts->log_facility = LOG_DAEMON;
309
        opts->threshold = BIRQ_DEFAULT_THRESHOLD;
310
        opts->load_limit = BIRQ_DEFAULT_LOAD_LIMIT;
311
        opts->verbose = 0;
312
        opts->ht = 0;
313
        opts->long_interval = BIRQ_LONG_INTERVAL;
314
        opts->short_interval = BIRQ_SHORT_INTERVAL;
315
        opts->strategy = BIRQ_CHOOSE_RND;
316
        cpus_init(opts->exclude_cpus);
317
        cpus_clear(opts->exclude_cpus);
318

    
319
        return opts;
320
}
321

    
322
/*--------------------------------------------------------- */
323
/* Free option structure */
324
static void opts_free(struct options *opts)
325
{
326
        if (opts->pidfile)
327
                free(opts->pidfile);
328
        if (opts->cfgfile)
329
                free(opts->cfgfile);
330
        if (opts->pxm)
331
                free(opts->pxm);
332
        cpus_free(opts->exclude_cpus);
333
        free(opts);
334
}
335

    
336
/* Parse 'strategy' option */
337
static int opt_parse_strategy(const char *optarg, birq_choose_strategy_e *strategy)
338
{
339
        assert(optarg);
340
        assert(strategy);
341

    
342
        if (!strcmp(optarg, "max"))
343
                *strategy = BIRQ_CHOOSE_MAX;
344
        else if (!strcmp(optarg, "min"))
345
                *strategy = BIRQ_CHOOSE_MIN;
346
        else if (!strcmp(optarg, "rnd"))
347
                *strategy = BIRQ_CHOOSE_RND;
348
        else {
349
                fprintf(stderr, "Error: Illegal strategy value %s.\n", optarg);
350
                return -1;
351
        }
352
        return 0;
353
}
354

    
355
/* Parse 'threshold' and 'load-limit' options */
356
static int opt_parse_threshold(const char *optarg, float *threshold)
357
{
358
        char *endptr;
359
        float thresh;
360

    
361
        assert(optarg);
362
        assert(threshold);
363

    
364
        thresh = strtof(optarg, &endptr);
365
        if (endptr == optarg) {
366
                fprintf(stderr, "Error: Illegal threshold/load-limit value %s.\n", optarg);
367
                return -1;
368
        }
369
        if (thresh > 100.00) {
370
                fprintf(stderr, "Error: The threshold/load-limit value %s > 100.\n", optarg);
371
                return -1;
372
        }
373
        *threshold = thresh;
374
        return 0;
375
}
376

    
377
/* Parse 'short-interval' and 'long-interval' options */
378
static int opt_parse_interval(const char *optarg, unsigned int *interval)
379
{
380
        char *endptr;
381
        unsigned long int val;
382

    
383
        assert(optarg);
384
        assert(interval);
385

    
386
        val = strtoul(optarg, &endptr, 10);
387
        if (endptr == optarg) {
388
                fprintf(stderr, "Error: Illegal interval value %s.\n", optarg);
389
                return -1;
390
        }
391
        *interval = val;
392
        return 0;
393
}
394

    
395
/*--------------------------------------------------------- */
396
/* Parse command line options */
397
static int opts_parse(int argc, char *argv[], struct options *opts)
398
{
399
        static const char *shortopts = "hp:c:dO:t:l:vri:I:s:x:";
400
#ifdef HAVE_GETOPT_H
401
        static const struct option longopts[] = {
402
                {"help",                0, NULL, 'h'},
403
                {"pid",                        1, NULL, 'p'},
404
                {"conf",                1, NULL, 'c'},
405
                {"debug",                0, NULL, 'd'},
406
                {"facility",                1, NULL, 'O'},
407
                {"threshold",                1, NULL, 't'},
408
                {"load-limit",                1, NULL, 't'},
409
                {"verbose",                0, NULL, 'v'},
410
                {"ht",                        0, NULL, 'r'},
411
                {"short-interval",        1, NULL, 'i'},
412
                {"long-interval",        1, NULL, 'I'},
413
                {"strategy",                1, NULL, 's'},
414
                {"pxm",                        1, NULL, 'x'},
415
                {NULL,                        0, NULL, 0}
416
        };
417
#endif
418
        optind = 1;
419
        while(1) {
420
                int opt;
421
#ifdef HAVE_GETOPT_H
422
                opt = getopt_long(argc, argv, shortopts, longopts, NULL);
423
#else
424
                opt = getopt(argc, argv, shortopts);
425
#endif
426
                if (-1 == opt)
427
                        break;
428
                switch (opt) {
429
                case 'p':
430
                        if (opts->pidfile)
431
                                free(opts->pidfile);
432
                        opts->pidfile = strdup(optarg);
433
                        break;
434
                case 'c':
435
                        if (opts->cfgfile)
436
                                free(opts->cfgfile);
437
                        opts->cfgfile = strdup(optarg);
438
                        opts->cfgfile_userdefined = 1;
439
                        break;
440
                case 'x':
441
                        if (opts->pxm)
442
                                free(opts->pxm);
443
                        opts->pxm = strdup(optarg);
444
                        break;
445
                case 'd':
446
                        opts->debug = 1;
447
                        break;
448
                case 'v':
449
                        opts->verbose = 1;
450
                        break;
451
                case 'r':
452
                        opts->ht = 1;
453
                        break;
454
                case 'O':
455
                        if (lub_log_facility(optarg, &(opts->log_facility))) {
456
                                fprintf(stderr, "Error: Illegal syslog facility %s.\n", optarg);
457
                                exit(-1);
458
                        }
459
                        break;
460
                case 't':
461
                        if (opt_parse_threshold(optarg, &opts->threshold))
462
                                exit(-1);
463
                        break;
464
                case 'l':
465
                        if (opt_parse_threshold(optarg, &opts->load_limit))
466
                                exit(-1);
467
                        break;
468
                case 'i':
469
                        if (opt_parse_interval(optarg, &opts->short_interval))
470
                                exit(-1);
471
                        break;
472
                case 'I':
473
                        if (opt_parse_interval(optarg, &opts->long_interval))
474
                                exit(-1);
475
                        break;
476
                case 's':
477
                        if (opt_parse_strategy(optarg, &opts->strategy) < 0)
478
                                exit(-1);
479
                        break;
480
                case 'h':
481
                        help(0, argv[0]);
482
                        exit(0);
483
                        break;
484
                default:
485
                        help(-1, argv[0]);
486
                        exit(-1);
487
                        break;
488
                }
489
        }
490

    
491

    
492
        return 0;
493
}
494

    
495
/*--------------------------------------------------------- */
496
/* Print help message */
497
static void help(int status, const char *argv0)
498
{
499
        const char *name = NULL;
500

    
501
        if (!argv0)
502
                return;
503

    
504
        /* Find the basename */
505
        name = strrchr(argv0, '/');
506
        if (name)
507
                name++;
508
        else
509
                name = argv0;
510

    
511
        if (status != 0) {
512
                fprintf(stderr, "Try `%s -h' for more information.\n",
513
                        name);
514
        } else {
515
                printf("Version : %s\n", VERSION);
516
                printf("Usage   : %s [options]\n", name);
517
                printf("Daemon to balance IRQs.\n");
518
                printf("Options :\n");
519
                printf("\t-h, --help Print this help.\n");
520
                printf("\t-d, --debug Debug mode. Don't daemonize.\n");
521
                printf("\t-v, --verbose Be verbose.\n");
522
                printf("\t-r, --ht Enable Hyper Threading.\n");
523
                printf("\t-p <path>, --pid=<path> File to save daemon's PID to (" BIRQ_PIDFILE ").\n");
524
                printf("\t-c <path>, --conf=<path> Config file (" BIRQ_CFGFILE ").\n");
525
                printf("\t-x <path>, --pxm=<path> Proximity config file.\n");
526
                printf("\t-O, --facility Syslog facility (DAEMON).\n");
527
                printf("\t-t <float>, --threshold=<float> Threshold to consider CPU is overloaded, in percents. Default threhold is %.2f.\n",
528
                        BIRQ_DEFAULT_THRESHOLD);
529
                printf("\t-l <float>, --load-limit=<float> Don't move IRQs to CPUs loaded more than this limit, in percents. Default limit is %.2f.\n",
530
                        BIRQ_DEFAULT_LOAD_LIMIT);
531
                printf("\t-i <sec>, --short-interval=<sec> Short iteration interval.\n");
532
                printf("\t-I <sec>, --long-interval=<sec> Long iteration interval.\n");
533
                printf("\t-s <strategy>, --strategy=<strategy> Strategy to choose IRQ to move (min/max/rnd).\n");
534
        }
535
}
536

    
537
/*--------------------------------------------------------- */
538
/* Parse config file */
539
static int parse_config(const char *fname, struct options *opts)
540
{
541
        lub_ini_t *ini;
542
        const char *tmp = NULL;
543

    
544
        ini = lub_ini_new();
545
        if (lub_ini_parse_file(ini, opts->cfgfile)) {
546
                lub_ini_free(ini);
547
                return -1;
548
        }
549

    
550
        if ((tmp = lub_ini_find(ini, "strategy")))
551
                if (opt_parse_strategy(tmp, &opts->strategy) < 0)
552
                        goto err;
553

    
554
        if ((tmp = lub_ini_find(ini, "threshold")))
555
                if (opt_parse_threshold(tmp, &opts->threshold))
556
                        goto err;
557

    
558
        if ((tmp = lub_ini_find(ini, "load-limit")))
559
                if (opt_parse_threshold(tmp, &opts->load_limit))
560
                        goto err;
561

    
562
        if ((tmp = lub_ini_find(ini, "short-interval")))
563
                if (opt_parse_interval(tmp, &opts->short_interval))
564
                        goto err;
565

    
566
        if ((tmp = lub_ini_find(ini, "long-interval")))
567
                if (opt_parse_interval(tmp, &opts->long_interval))
568
                        goto err;
569

    
570
        if ((tmp = lub_ini_find(ini, "exclude-cpus")))
571
                if (cpumask_parse_user(tmp, strlen(tmp), opts->exclude_cpus)) {
572
                        fprintf(stderr, "Error: Can't parse exclude-cpu option \"%s\".\n", tmp);
573
                        goto err;
574
                }
575

    
576
        return 0;
577
err:
578
        lub_ini_free(ini);
579
        return -1;
580
}