Project

General

Profile

Statistics
| Branch: | Tag: | Revision:

birq / birq.c @ 4534af0a

History | View | Annotate | Download (14.1 KB)

1
/*
2
 * birq
3
 *
4
 * Balance IRQ
5
 *
6
 */
7

    
8
#ifdef HAVE_CONFIG_H
9
#include "config.h"
10
#endif /* HAVE_CONFIG_H */
11

    
12
#include <stdio.h>
13
#include <stdlib.h>
14
#include <unistd.h>
15
#include <sys/types.h>
16
#include <errno.h>
17
#include <assert.h>
18
#include <string.h>
19
#include <signal.h>
20
#include <syslog.h>
21
#include <fcntl.h>
22
#include <time.h>
23
#ifdef HAVE_GETOPT_H
24
#include <getopt.h>
25
#endif
26

    
27
#include "birq.h"
28
#include "lub/log.h"
29
#include "lub/list.h"
30
#include "lub/ini.h"
31
#include "irq.h"
32
#include "numa.h"
33
#include "cpu.h"
34
#include "statistics.h"
35
#include "balance.h"
36
#include "pxm.h"
37

    
38
#ifndef VERSION
39
#define VERSION "1.2.0"
40
#endif
41

    
42
/* Signal handlers */
43
static volatile int sigterm = 0; /* Exit if 1 */
44
static void sighandler(int signo);
45

    
46
static volatile int sighup = 0; /* Re-read config file */
47
static void sighup_handler(int signo);
48

    
49
static void help(int status, const char *argv0);
50
static struct options *opts_init(void);
51
static void opts_free(struct options *opts);
52
static int opts_parse(int argc, char *argv[], struct options *opts);
53
static int parse_config(const char *fname, struct options *opts);
54

    
55
/* Command line options */
56
struct options {
57
        char *pidfile;
58
        char *cfgfile;
59
        int cfgfile_userdefined;
60
        char *pxm; /* Proximity config file */
61
        int debug; /* Don't daemonize in debug mode */
62
        int log_facility;
63
        float threshold;
64
        float load_limit;
65
        int verbose;
66
        int ht;
67
        unsigned int long_interval;
68
        unsigned int short_interval;
69
        birq_choose_strategy_e strategy;
70
        cpumask_t exclude_cpus;
71
};
72

    
73
/*--------------------------------------------------------- */
74
int main(int argc, char **argv)
75
{
76
        int retval = -1;
77
        struct options *opts = NULL;
78
        int pidfd = -1;
79
        unsigned int interval;
80

    
81
        /* Signal vars */
82
        struct sigaction sig_act;
83
        sigset_t sig_set;
84

    
85
        /* IRQ list. It contain all found IRQs. */
86
        lub_list_t *irqs;
87
        /* IRQs need to be balanced */
88
        lub_list_t *balance_irqs;
89
        /* CPU list. It contain all found CPUs. */
90
        lub_list_t *cpus;
91
        /* NUMA list. It contain all found NUMA nodes. */
92
        lub_list_t *numas;
93
        /* Proximity list. */
94
        lub_list_t *pxms;
95

    
96
        /* Parse command line options */
97
        opts = opts_init();
98
        if (opts_parse(argc, argv, opts))
99
                goto err;
100

    
101
        /* Parse config file */
102
        if (!access(opts->cfgfile, R_OK)) {
103
                if (parse_config(opts->cfgfile, opts))
104
                        goto err;
105
        } else if (opts->cfgfile_userdefined) {
106
                fprintf(stderr, "Error: Can't find config file %s\n",
107
                        opts->cfgfile);
108
                goto err;
109
        }
110

    
111
        /* Validate threshold and load limit */
112
        if (opts->load_limit > opts->threshold) {
113
                fprintf(stderr, "Error: The load limit is greater than threshold.\n");
114
                goto err;
115
        }
116

    
117
        /* Initialize syslog */
118
        openlog(argv[0], LOG_CONS, opts->log_facility);
119
        syslog(LOG_ERR, "Start daemon.\n");
120

    
121
        /* Fork the daemon */
122
        if (!opts->debug) {
123
                /* Daemonize */
124
                if (daemon(0, 0) < 0) {
125
                        syslog(LOG_ERR, "Can't daemonize\n");
126
                        goto err;
127
                }
128

    
129
                /* Write pidfile */
130
                if ((pidfd = open(opts->pidfile,
131
                        O_WRONLY | O_CREAT | O_EXCL | O_TRUNC,
132
                        S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)) < 0) {
133
                        syslog(LOG_WARNING, "Can't open pidfile %s: %s\n",
134
                                opts->pidfile, strerror(errno));
135
                } else {
136
                        char str[20];
137
                        snprintf(str, sizeof(str), "%u\n", getpid());
138
                        str[sizeof(str) - 1] = '\0';
139
                        if (write(pidfd, str, strlen(str)) < 0)
140
                                syslog(LOG_WARNING, "Can't write to %s: %s\n",
141
                                        opts->pidfile, strerror(errno));
142
                        close(pidfd);
143
                }
144
        }
145

    
146
        /* Set signal handler */
147
        sigemptyset(&sig_set);
148
        sigaddset(&sig_set, SIGTERM);
149
        sigaddset(&sig_set, SIGINT);
150
        sigaddset(&sig_set, SIGQUIT);
151

    
152
        sig_act.sa_flags = 0;
153
        sig_act.sa_mask = sig_set;
154
        sig_act.sa_handler = &sighandler;
155
        sigaction(SIGTERM, &sig_act, NULL);
156
        sigaction(SIGINT, &sig_act, NULL);
157
        sigaction(SIGQUIT, &sig_act, NULL);
158

    
159
        /* SIGHUP handler */
160
        sigemptyset(&sig_set);
161
        sigaddset(&sig_set, SIGHUP);
162

    
163
        sig_act.sa_flags = 0;
164
        sig_act.sa_mask = sig_set;
165
        sig_act.sa_handler = &sighup_handler;
166
        sigaction(SIGHUP, &sig_act, NULL);
167

    
168
        /* Randomize */
169
        srand(time(NULL));
170

    
171
        /* Scan NUMA nodes */
172
        numas = lub_list_new(numa_list_compare);
173
        scan_numas(numas);
174
        if (opts->verbose)
175
                show_numas(numas);
176

    
177
        /* Scan CPUs */
178
        cpus = lub_list_new(cpu_list_compare);
179
        scan_cpus(cpus, opts->ht);
180
        if (opts->verbose)
181
                show_cpus(cpus);
182

    
183
        /* Prepare data structures */
184
        irqs = lub_list_new(irq_list_compare);
185
        balance_irqs = lub_list_new(irq_list_compare);
186

    
187
        /* Parse proximity file */
188
        pxms = lub_list_new(NULL);
189
        if (opts->pxm)
190
                parse_pxm_config(opts->pxm, pxms, numas);
191
        if (opts->verbose)
192
                show_pxms(pxms);
193

    
194
        /* Main loop */
195
        while (!sigterm) {
196
                lub_list_node_t *node;
197
                char outstr[10];
198
                time_t t;
199
                struct tm *tmp;
200

    
201
                t = time(NULL);
202
                tmp = localtime(&t);
203
                if (tmp) {
204
                        strftime(outstr, sizeof(outstr), "%H:%M:%S", tmp);
205
                        printf("----[ %s ]----------------------------------------------------------------\n", outstr);
206
                }
207

    
208
                /* Re-read config file on SIGHUP */
209
                if (sighup) {
210
                        if (!access(opts->cfgfile, R_OK)) {
211
                                syslog(LOG_ERR, "Re-reading config file\n");
212
                                if (parse_config(opts->cfgfile, opts))
213
                                        syslog(LOG_ERR, "Error while config file parsing.\n");
214
                        } else if (opts->cfgfile_userdefined)
215
                                syslog(LOG_ERR, "Can't find config file.\n");
216
                        sighup = 0;
217
                }
218

    
219
                /* Rescan PCI devices for new IRQs. */
220
                scan_irqs(irqs, balance_irqs, pxms);
221
                if (opts->verbose)
222
                        irq_list_show(irqs);
223
                /* Link IRQs to CPUs due to real current smp affinity. */
224
                link_irqs_to_cpus(cpus, irqs);
225

    
226
                /* Gather statistics on CPU load and number of interrupts. */
227
                gather_statistics(cpus, irqs);
228
                show_statistics(cpus, opts->verbose);
229
                /* Choose IRQ to move to another CPU. */
230
                choose_irqs_to_move(cpus, balance_irqs,
231
                        opts->threshold, opts->strategy, &opts->exclude_cpus);
232

    
233
                /* Balance IRQs */
234
                if (lub_list_len(balance_irqs) != 0) {
235
                        /* Set short interval to make balancing faster. */
236
                        interval = opts->short_interval;
237
                        /* Choose new CPU for IRQs need to be balanced. */
238
                        balance(cpus, balance_irqs, opts->load_limit,
239
                                &opts->exclude_cpus);
240
                        /* Write new values to /proc/irq/<IRQ>/smp_affinity */
241
                        apply_affinity(balance_irqs);
242
                        /* Free list of balanced IRQs */
243
                        while ((node = lub_list__get_tail(balance_irqs))) {
244
                                lub_list_del(balance_irqs, node);
245
                                lub_list_node_free(node);
246
                        }
247
                } else {
248
                        /* If nothing to balance */
249
                        interval = opts->long_interval;
250
                }
251
                
252
                /* Wait before next iteration */
253
                sleep(interval);
254
        }
255

    
256
        /* Free data structures */
257
        irq_list_free(irqs);
258
        lub_list_free(balance_irqs);
259
        cpu_list_free(cpus);
260
        numa_list_free(numas);
261
        pxm_list_free(pxms);
262

    
263
        retval = 0;
264
err:
265
        /* Remove pidfile */
266
        if (pidfd >= 0) {
267
                if (unlink(opts->pidfile) < 0) {
268
                        syslog(LOG_ERR, "Can't remove pid-file %s: %s\n",
269
                        opts->pidfile, strerror(errno));
270
                }
271
        }
272

    
273
        /* Free command line options */
274
        opts_free(opts);
275
        syslog(LOG_ERR, "Stop daemon.\n");
276

    
277
        return retval;
278
}
279

    
280
/*--------------------------------------------------------- */
281
/* Signal handler for temination signals (like SIGTERM, SIGINT, ...) */
282
static void sighandler(int signo)
283
{
284
        sigterm = 1;
285
        signo = signo; /* Happy compiler */
286
}
287

    
288
/*--------------------------------------------------------- */
289
/* Re-read config file on SIGHUP */
290
static void sighup_handler(int signo)
291
{
292
        sighup = 1;
293
        signo = signo; /* Happy compiler */
294
}
295

    
296
/*--------------------------------------------------------- */
297
/* Initialize option structure by defaults */
298
static struct options *opts_init(void)
299
{
300
        struct options *opts = NULL;
301

    
302
        opts = malloc(sizeof(*opts));
303
        assert(opts);
304
        opts->debug = 0; /* daemonize by default */
305
        opts->pidfile = strdup(BIRQ_PIDFILE);
306
        opts->cfgfile = strdup(BIRQ_CFGFILE);
307
        opts->cfgfile_userdefined = 0;
308
        opts->pxm = NULL;
309
        opts->log_facility = LOG_DAEMON;
310
        opts->threshold = BIRQ_DEFAULT_THRESHOLD;
311
        opts->load_limit = BIRQ_DEFAULT_LOAD_LIMIT;
312
        opts->verbose = 0;
313
        opts->ht = 0;
314
        opts->long_interval = BIRQ_LONG_INTERVAL;
315
        opts->short_interval = BIRQ_SHORT_INTERVAL;
316
        opts->strategy = BIRQ_CHOOSE_RND;
317
        cpus_init(opts->exclude_cpus);
318
        cpus_clear(opts->exclude_cpus);
319

    
320
        return opts;
321
}
322

    
323
/*--------------------------------------------------------- */
324
/* Free option structure */
325
static void opts_free(struct options *opts)
326
{
327
        if (opts->pidfile)
328
                free(opts->pidfile);
329
        if (opts->cfgfile)
330
                free(opts->cfgfile);
331
        if (opts->pxm)
332
                free(opts->pxm);
333
        cpus_free(opts->exclude_cpus);
334
        free(opts);
335
}
336

    
337
/* Parse 'strategy' option */
338
static int opt_parse_strategy(const char *optarg, birq_choose_strategy_e *strategy)
339
{
340
        assert(optarg);
341
        assert(strategy);
342

    
343
        if (!strcmp(optarg, "max"))
344
                *strategy = BIRQ_CHOOSE_MAX;
345
        else if (!strcmp(optarg, "min"))
346
                *strategy = BIRQ_CHOOSE_MIN;
347
        else if (!strcmp(optarg, "rnd"))
348
                *strategy = BIRQ_CHOOSE_RND;
349
        else {
350
                fprintf(stderr, "Error: Illegal strategy value %s.\n", optarg);
351
                return -1;
352
        }
353
        return 0;
354
}
355

    
356
/* Parse 'threshold' and 'load-limit' options */
357
static int opt_parse_threshold(const char *optarg, float *threshold)
358
{
359
        char *endptr;
360
        float thresh;
361

    
362
        assert(optarg);
363
        assert(threshold);
364

    
365
        thresh = strtof(optarg, &endptr);
366
        if (endptr == optarg) {
367
                fprintf(stderr, "Error: Illegal threshold/load-limit value %s.\n", optarg);
368
                return -1;
369
        }
370
        if (thresh > 100.00) {
371
                fprintf(stderr, "Error: The threshold/load-limit value %s > 100.\n", optarg);
372
                return -1;
373
        }
374
        *threshold = thresh;
375
        return 0;
376
}
377

    
378
/* Parse 'short-interval' and 'long-interval' options */
379
static int opt_parse_interval(const char *optarg, unsigned int *interval)
380
{
381
        char *endptr;
382
        unsigned long int val;
383

    
384
        assert(optarg);
385
        assert(interval);
386

    
387
        val = strtoul(optarg, &endptr, 10);
388
        if (endptr == optarg) {
389
                fprintf(stderr, "Error: Illegal interval value %s.\n", optarg);
390
                return -1;
391
        }
392
        *interval = val;
393
        return 0;
394
}
395

    
396
/*--------------------------------------------------------- */
397
/* Parse command line options */
398
static int opts_parse(int argc, char *argv[], struct options *opts)
399
{
400
        static const char *shortopts = "hp:c:dO:t:l:vri:I:s:x:";
401
#ifdef HAVE_GETOPT_H
402
        static const struct option longopts[] = {
403
                {"help",                0, NULL, 'h'},
404
                {"pid",                        1, NULL, 'p'},
405
                {"conf",                1, NULL, 'c'},
406
                {"debug",                0, NULL, 'd'},
407
                {"facility",                1, NULL, 'O'},
408
                {"threshold",                1, NULL, 't'},
409
                {"load-limit",                1, NULL, 't'},
410
                {"verbose",                0, NULL, 'v'},
411
                {"ht",                        0, NULL, 'r'},
412
                {"short-interval",        1, NULL, 'i'},
413
                {"long-interval",        1, NULL, 'I'},
414
                {"strategy",                1, NULL, 's'},
415
                {"pxm",                        1, NULL, 'x'},
416
                {NULL,                        0, NULL, 0}
417
        };
418
#endif
419
        optind = 1;
420
        while(1) {
421
                int opt;
422
#ifdef HAVE_GETOPT_H
423
                opt = getopt_long(argc, argv, shortopts, longopts, NULL);
424
#else
425
                opt = getopt(argc, argv, shortopts);
426
#endif
427
                if (-1 == opt)
428
                        break;
429
                switch (opt) {
430
                case 'p':
431
                        if (opts->pidfile)
432
                                free(opts->pidfile);
433
                        opts->pidfile = strdup(optarg);
434
                        break;
435
                case 'c':
436
                        if (opts->cfgfile)
437
                                free(opts->cfgfile);
438
                        opts->cfgfile = strdup(optarg);
439
                        opts->cfgfile_userdefined = 1;
440
                        break;
441
                case 'x':
442
                        if (opts->pxm)
443
                                free(opts->pxm);
444
                        opts->pxm = strdup(optarg);
445
                        break;
446
                case 'd':
447
                        opts->debug = 1;
448
                        break;
449
                case 'v':
450
                        opts->verbose = 1;
451
                        break;
452
                case 'r':
453
                        opts->ht = 1;
454
                        break;
455
                case 'O':
456
                        if (lub_log_facility(optarg, &(opts->log_facility))) {
457
                                fprintf(stderr, "Error: Illegal syslog facility %s.\n", optarg);
458
                                exit(-1);
459
                        }
460
                        break;
461
                case 't':
462
                        if (opt_parse_threshold(optarg, &opts->threshold))
463
                                exit(-1);
464
                        break;
465
                case 'l':
466
                        if (opt_parse_threshold(optarg, &opts->load_limit))
467
                                exit(-1);
468
                        break;
469
                case 'i':
470
                        if (opt_parse_interval(optarg, &opts->short_interval))
471
                                exit(-1);
472
                        break;
473
                case 'I':
474
                        if (opt_parse_interval(optarg, &opts->long_interval))
475
                                exit(-1);
476
                        break;
477
                case 's':
478
                        if (opt_parse_strategy(optarg, &opts->strategy) < 0)
479
                                exit(-1);
480
                        break;
481
                case 'h':
482
                        help(0, argv[0]);
483
                        exit(0);
484
                        break;
485
                default:
486
                        help(-1, argv[0]);
487
                        exit(-1);
488
                        break;
489
                }
490
        }
491

    
492

    
493
        return 0;
494
}
495

    
496
/*--------------------------------------------------------- */
497
/* Print help message */
498
static void help(int status, const char *argv0)
499
{
500
        const char *name = NULL;
501

    
502
        if (!argv0)
503
                return;
504

    
505
        /* Find the basename */
506
        name = strrchr(argv0, '/');
507
        if (name)
508
                name++;
509
        else
510
                name = argv0;
511

    
512
        if (status != 0) {
513
                fprintf(stderr, "Try `%s -h' for more information.\n",
514
                        name);
515
        } else {
516
                printf("Version : %s\n", VERSION);
517
                printf("Usage   : %s [options]\n", name);
518
                printf("Daemon to balance IRQs.\n");
519
                printf("Options :\n");
520
                printf("\t-h, --help Print this help.\n");
521
                printf("\t-d, --debug Debug mode. Don't daemonize.\n");
522
                printf("\t-v, --verbose Be verbose.\n");
523
                printf("\t-r, --ht Enable Hyper Threading.\n");
524
                printf("\t-p <path>, --pid=<path> File to save daemon's PID to (" BIRQ_PIDFILE ").\n");
525
                printf("\t-c <path>, --conf=<path> Config file (" BIRQ_CFGFILE ").\n");
526
                printf("\t-x <path>, --pxm=<path> Proximity config file.\n");
527
                printf("\t-O, --facility Syslog facility (DAEMON).\n");
528
                printf("\t-t <float>, --threshold=<float> Threshold to consider CPU is overloaded, in percents. Default threhold is %.2f.\n",
529
                        BIRQ_DEFAULT_THRESHOLD);
530
                printf("\t-l <float>, --load-limit=<float> Don't move IRQs to CPUs loaded more than this limit, in percents. Default limit is %.2f.\n",
531
                        BIRQ_DEFAULT_LOAD_LIMIT);
532
                printf("\t-i <sec>, --short-interval=<sec> Short iteration interval.\n");
533
                printf("\t-I <sec>, --long-interval=<sec> Long iteration interval.\n");
534
                printf("\t-s <strategy>, --strategy=<strategy> Strategy to choose IRQ to move (min/max/rnd).\n");
535
        }
536
}
537

    
538
/*--------------------------------------------------------- */
539
/* Parse config file */
540
static int parse_config(const char *fname, struct options *opts)
541
{
542
        int ret = -1; /* Pessimistic retval */
543
        lub_ini_t *ini;
544
        const char *tmp = NULL;
545

    
546
        ini = lub_ini_new();
547
        if (lub_ini_parse_file(ini, opts->cfgfile)) {
548
                lub_ini_free(ini);
549
                return -1;
550
        }
551

    
552
        if ((tmp = lub_ini_find(ini, "strategy")))
553
                if (opt_parse_strategy(tmp, &opts->strategy) < 0)
554
                        goto err;
555

    
556
        if ((tmp = lub_ini_find(ini, "threshold")))
557
                if (opt_parse_threshold(tmp, &opts->threshold))
558
                        goto err;
559

    
560
        if ((tmp = lub_ini_find(ini, "load-limit")))
561
                if (opt_parse_threshold(tmp, &opts->load_limit))
562
                        goto err;
563

    
564
        if ((tmp = lub_ini_find(ini, "short-interval")))
565
                if (opt_parse_interval(tmp, &opts->short_interval))
566
                        goto err;
567

    
568
        if ((tmp = lub_ini_find(ini, "long-interval")))
569
                if (opt_parse_interval(tmp, &opts->long_interval))
570
                        goto err;
571

    
572
        if ((tmp = lub_ini_find(ini, "exclude-cpus")))
573
                if (cpumask_parse_user(tmp, strlen(tmp), opts->exclude_cpus)) {
574
                        fprintf(stderr, "Error: Can't parse exclude-cpu option \"%s\".\n", tmp);
575
                        goto err;
576
                }
577

    
578
        ret = 0;
579
err:
580
        lub_ini_free(ini);
581
        return ret;
582
}