8.4 使用 RDMA_CM 和 IBV verbs 的代码

mckey.c (show/hide)

  1/*
  2 * BUILD COMMAND:
  3 * gcc -g -Wall -D_GNU_SOURCE -g -O2 -o mckey mckey.c -libverbs -lrdmacm
  4 *
  5 * $Id$
  6 */
  7#include <stdlib.h>
  8#include <string.h>
  9#include <stdio.h>
 10#include <errno.h>
 11#include <sys/types.h>
 12#include <netinet/in.h>
 13#include <arpa/inet.h>
 14#include <sys/socket.h>
 15#include <netdb.h>
 16#include <byteswap.h>
 17#include <unistd.h>
 18#include <getopt.h>
 19#include <rdma/rdma_cma.h>
 20struct cmatest_node
 21{
 22    int id;
 23    struct rdma_cm_id *cma_id;
 24    int connected;
 25    struct ibv_pd *pd;
 26    struct ibv_cq *cq;
 27    struct ibv_mr *mr;
 28    struct ibv_ah *ah;
 29    uint32_t remote_qpn;
 30    uint32_t remote_qkey;
 31    void *mem;
 32};
 33struct cmatest
 34{
 35    struct rdma_event_channel *channel;
 36    struct cmatest_node *nodes;
 37    int conn_index;
 38    int connects_left;
 39    struct sockaddr_in6 dst_in;
 40    struct sockaddr *dst_addr;
 41    struct sockaddr_in6 src_in;
 42    struct sockaddr *src_addr;
 43};
 44static struct cmatest test;
 45static int connections = 1;
 46static int message_size = 100;
 47static int message_count = 10;
 48static int is_sender;
 49static int unmapped_addr;
 50static char *dst_addr;
 51static char *src_addr;
 52static enum rdma_port_space port_space = RDMA_PS_UDP;
 53static int create_message(struct cmatest_node *node)
 54{
 55    if (!message_size)
 56        message_count = 0;
 57    if (!message_count)
 58        return 0;
 59    node->mem = malloc(message_size + sizeof(struct ibv_grh));
 60    if (!node->mem)
 61    {
 62        printf("failed message allocation\n");
 63        return -1;
 64    }
 65    node->mr = ibv_reg_mr(node->pd, node->mem, message_size + sizeof(struct ibv_grh),
 66                          IBV_ACCESS_LOCAL_WRITE);
 67    if (!node->mr)
 68    {
 69        printf("failed to reg MR\n");
 70        goto err;
 71    }
 72    return 0;
 73err:
 74    free(node->mem);
 75    return -1;
 76}
 77static int verify_test_params(struct cmatest_node *node)
 78{
 79    struct ibv_port_attr port_attr;
 80    int ret;
 81    ret = ibv_query_port(node->cma_id->verbs, node->cma_id->port_num, &port_attr);
 82    if (ret)
 83        return ret;
 84    if (message_count && message_size > (1 << (port_attr.active_mtu + 7)))
 85    {
 86        printf("mckey: message_size %d is larger than active mtu %d\n", message_size, 1 << (port_attr.active_mtu + 7));
 87        return -EINVAL;
 88    }
 89    return 0;
 90}
 91static int init_node(struct cmatest_node *node)
 92{
 93    struct ibv_qp_init_attr init_qp_attr;
 94    int cqe, ret;
 95    node->pd = ibv_alloc_pd(node->cma_id->verbs);
 96    if (!node->pd)
 97    {
 98        ret = -ENOMEM;
 99        printf("mckey: unable to allocate PD\n");
100        goto out;
101    }
102    cqe = message_count ? message_count * 2 : 2;
103    node->cq = ibv_create_cq(node->cma_id->verbs, cqe, node, 0, 0);
104    if (!node->cq)
105    {
106        ret = -ENOMEM;
107        printf("mckey: unable to create CQ\n");
108        goto out;
109    }
110    memset(&init_qp_attr, 0, sizeof init_qp_attr);
111    init_qp_attr.cap.max_send_wr = message_count ? message_count : 1;
112    init_qp_attr.cap.max_recv_wr = message_count ? message_count : 1;
113    init_qp_attr.cap.max_send_sge = 1;
114    init_qp_attr.cap.max_recv_sge = 1;
115    init_qp_attr.qp_context = node;
116    init_qp_attr.sq_sig_all = 0;
117    init_qp_attr.qp_type = IBV_QPT_UD;
118    init_qp_attr.send_cq = node->cq;
119    init_qp_attr.recv_cq = node->cq;
120    ret = rdma_create_qp(node->cma_id, node->pd, &init_qp_attr);
121    if (ret)
122    {
123        printf("mckey: unable to create QP: %d\n", ret);
124        goto out;
125    }
126    ret = create_message(node);
127    if (ret)
128    {
129        printf("mckey: failed to create messages: %d\n", ret);
130        goto out;
131    }
132out:
133    return ret;
134}
135static int post_recvs(struct cmatest_node *node)
136{
137    struct ibv_recv_wr recv_wr, *recv_failure;
138    struct ibv_sge sge;
139    int i, ret = 0;
140    if (!message_count)
141        return 0;
142    recv_wr.next = NULL;
143    recv_wr.sg_list = &sge;
144    recv_wr.num_sge = 1;
145    recv_wr.wr_id = (uintptr_t)node;
146    sge.length = message_size + sizeof(struct ibv_grh);
147    sge.lkey = node->mr->lkey;
148    sge.addr = (uintptr_t)node->mem;
149    for (i = 0; i < message_count && !ret; i++)
150    {
151        ret = ibv_post_recv(node->cma_id->qp, &recv_wr, &recv_failure);
152        if (ret)
153        {
154            printf("failed to post receives: %d\n", ret);
155            break;
156        }
157    }
158    return ret;
159}
160static int post_sends(struct cmatest_node *node, int signal_flag)
161{
162    struct ibv_send_wr send_wr, *bad_send_wr;
163    struct ibv_sge sge;
164    int i, ret = 0;
165    if (!node->connected || !message_count)
166        return 0;
167    send_wr.next = NULL;
168    send_wr.sg_list = &sge;
169    send_wr.num_sge = 1;
170    send_wr.opcode = IBV_WR_SEND_WITH_IMM;
171    send_wr.send_flags = signal_flag;
172    send_wr.wr_id = (unsigned long)node;
173    send_wr.imm_data = htonl(node->cma_id->qp->qp_num);
174    send_wr.wr.ud.ah = node->ah;
175    send_wr.wr.ud.remote_qpn = node->remote_qpn;
176    send_wr.wr.ud.remote_qkey = node->remote_qkey;
177    sge.length = message_size;
178    sge.lkey = node->mr->lkey;
179    sge.addr = (uintptr_t)node->mem;
180    for (i = 0; i < message_count && !ret; i++)
181    {
182        ret = ibv_post_send(node->cma_id->qp, &send_wr, &bad_send_wr);
183        if (ret)
184            printf("failed to post sends: %d\n", ret);
185    }
186    return ret;
187}
188static void connect_error(void)
189{
190    test.connects_left--;
191}
192static int addr_handler(struct cmatest_node *node)
193{
194    int ret;
195    ret = verify_test_params(node);
196    if (ret)
197        goto err;
198    ret = init_node(node);
199    if (ret)
200        goto err;
201    if (!is_sender)
202    {
203        ret = post_recvs(node);
204        if (ret)
205            goto err;
206    }
207    ret = rdma_join_multicast(node->cma_id, test.dst_addr, node);
208    if (ret)
209    {
210        printf("mckey: failure joining: %d\n", ret);
211        goto err;
212    }
213    return 0;
214err:
215    connect_error();
216    return ret;
217}
218static int join_handler(struct cmatest_node *node, struct rdma_ud_param *param)
219{
220    char buf[40];
221    inet_ntop(AF_INET6, param->ah_attr.grh.dgid.raw, buf, 40);
222    printf("mckey: joined dgid: %s\n", buf);
223    node->remote_qpn = param->qp_num;
224    node->remote_qkey = param->qkey;
225    node->ah = ibv_create_ah(node->pd, &param->ah_attr);
226    if (!node->ah)
227    {
228        printf("mckey: failure creating address handle\n");
229        goto err;
230    }
231    node->connected = 1;
232    test.connects_left--;
233    return 0;
234err:
235    connect_error();
236    return -1;
237}
238static int cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
239{
240    int ret = 0;
241    switch (event->event)
242    {
243    case RDMA_CM_EVENT_ADDR_RESOLVED:
244        ret = addr_handler(cma_id->context);
245        break;
246    case RDMA_CM_EVENT_MULTICAST_JOIN:
247        ret = join_handler(cma_id->context, &event->param.ud);
248        break;
249    case RDMA_CM_EVENT_ADDR_ERROR:
250    case RDMA_CM_EVENT_ROUTE_ERROR:
251    case RDMA_CM_EVENT_MULTICAST_ERROR:
252        printf("mckey: event: %s, error: %d\n", rdma_event_str(event->event), event->status);
253        connect_error();
254        ret = event->status;
255        break;
256    case RDMA_CM_EVENT_DEVICE_REMOVAL:
257        /* Cleanup will occur after test completes. */
258        break;
259    default:
260        break;
261    }
262    return ret;
263}
264static void destroy_node(struct cmatest_node *node)
265{
266    if (!node->cma_id)
267        return;
268    if (node->ah)
269        ibv_destroy_ah(node->ah);
270    if (node->cma_id->qp)
271        rdma_destroy_qp(node->cma_id);
272    if (node->cq)
273        ibv_destroy_cq(node->cq);
274    if (node->mem)
275    {
276        ibv_dereg_mr(node->mr);
277        free(node->mem);
278    }
279    if (node->pd)
280        ibv_dealloc_pd(node->pd);
281    /* Destroy the RDMA ID after all device resources */
282    rdma_destroy_id(node->cma_id);
283}
284static int alloc_nodes(void)
285{
286    int ret, i;
287    test.nodes = malloc(sizeof *test.nodes * connections);
288    if (!test.nodes)
289    {
290        printf("mckey: unable to allocate memory for test nodes\n");
291        return -ENOMEM;
292    }
293    memset(test.nodes, 0, sizeof *test.nodes * connections);
294    for (i = 0; i < connections; i++)
295    {
296        test.nodes[i].id = i;
297        ret = rdma_create_id(test.channel, &test.nodes[i].cma_id, &test.nodes[i], port_space);
298        if (ret)
299            goto err;
300    }
301    return 0;
302err:
303    while (--i >= 0)
304        rdma_destroy_id(test.nodes[i].cma_id);
305    free(test.nodes);
306    return ret;
307}
308static void destroy_nodes(void)
309{
310    int i;
311    for (i = 0; i < connections; i++)
312        destroy_node(&test.nodes[i]);
313    free(test.nodes);
314}
315static int poll_cqs(void)
316{
317    struct ibv_wc wc[8];
318    int done, i, ret;
319    for (i = 0; i < connections; i++)
320    {
321        if (!test.nodes[i].connected)
322            continue;
323        for (done = 0; done < message_count; done += ret)
324        {
325            ret = ibv_poll_cq(test.nodes[i].cq, 8, wc);
326            if (ret < 0)
327            {
328                printf("mckey: failed polling CQ: %d\n", ret);
329                return ret;
330            }
331        }
332    }
333    return 0;
334}
335static int connect_events(void)
336{
337    struct rdma_cm_event *event;
338    int ret = 0;
339    while (test.connects_left && !ret)
340    {
341        ret = rdma_get_cm_event(test.channel, &event);
342        if (!ret)
343        {
344            ret = cma_handler(event->id, event);
345            rdma_ack_cm_event(event);
346        }
347    }
348    return ret;
349}
350static int get_addr(char *dst, struct sockaddr *addr)
351{
352    struct addrinfo *res;
353    int ret;
354    ret = getaddrinfo(dst, NULL, NULL, &res);
355    if (ret)
356    {
357        printf("getaddrinfo failed - invalid hostname or IP address\n");
358        return ret;
359    }
360    memcpy(addr, res->ai_addr, res->ai_addrlen);
361    freeaddrinfo(res);
362    return ret;
363}
364static int run(void)
365{
366    int i, ret;
367    printf("mckey: starting %s\n", is_sender ? "client" : "server");
368    if (src_addr)
369    {
370        ret = get_addr(src_addr, (struct sockaddr *)&test.src_in);
371        if (ret)
372            return ret;
373    }
374    ret = get_addr(dst_addr, (struct sockaddr *)&test.dst_in);
375    if (ret)
376        return ret;
377    printf("mckey: joining\n");
378    for (i = 0; i < connections; i++)
379    {
380        if (src_addr)
381        {
382            ret = rdma_bind_addr(test.nodes[i].cma_id, test.src_addr);
383            if (ret)
384            {
385                printf("mckey: addr bind failure: %d\n", ret);
386                connect_error();
387                return ret;
388            }
389        }
390        if (unmapped_addr)
391            ret = addr_handler(&test.nodes[i]);
392        else
393            ret = rdma_resolve_addr(test.nodes[i].cma_id, test.src_addr, test.dst_addr,
394                                    2000);
395        if (ret)
396        {
397            printf("mckey: resolve addr failure: %d\n", ret);
398            connect_error();
399            return ret;
400        }
401    }
402    ret = connect_events();
403    if (ret)
404        goto out;
405    /*
406     * Pause to give SM chance to configure switches. We don't want to
407     * handle reliability issue in this simple test program.
408     */
409    sleep(3);
410    if (message_count)
411    {
412        if (is_sender)
413        {
414            printf("initiating data transfers\n");
415            for (i = 0; i < connections; i++)
416            {
417                ret = post_sends(&test.nodes[i], 0);
418                if (ret)
419                    goto out;
420            }
421        }
422        else
423        {
424            printf("receiving data transfers\n");
425            ret = poll_cqs();
426            if (ret)
427                goto out;
428        }
429        printf("data transfers complete\n");
430    }
431out:
432    for (i = 0; i < connections; i++)
433    {
434        ret = rdma_leave_multicast(test.nodes[i].cma_id, test.dst_addr);
435        if (ret)
436            printf("mckey: failure leaving: %d\n", ret);
437    }
438    return ret;
439}
440int main(int argc, char **argv)
441{
442    int op, ret;
443    while ((op = getopt(argc, argv, "m:M:sb:c:C:S:p:")) != -1)
444    {
445        switch (op)
446        {
447        case 'm':
448            dst_addr = optarg;
449            break;
450        case 'M':
451            unmapped_addr = 1;
452            dst_addr = optarg;
453            break;
454        case 's':
455            is_sender = 1;
456            break;
457        case 'b':
458            src_addr = optarg;
459            test.src_addr = (struct sockaddr *)&test.src_in;
460            break;
461        case 'c':
462            connections = atoi(optarg);
463            break;
464        case 'C':
465            message_count = atoi(optarg);
466            break;
467        case 'S':
468            message_size = atoi(optarg);
469            break;
470        case 'p':
471            port_space = strtol(optarg, NULL, 0);
472            break;
473        default:
474            printf("usage: %s\n", argv[0]);
475            printf("\t-m multicast_address\n");
476            printf("\t[-M unmapped_multicast_address]\n"
477                   "\t replaces -m and requires -b\n");
478            printf("\t[-s(ender)]\n");
479            printf("\t[-b bind_address]\n");
480            printf("\t[-c connections]\n");
481            printf("\t[-C message_count]\n");
482            printf("\t[-S message_size]\n");
483            printf("\t[-p port_space - %#x for UDP (default), %#x for IPOIB]\n", RDMA_PS_UDP, RDMA_PS_IPOIB);
484            exit(1);
485        }
486    }
487    test.dst_addr = (struct sockaddr *)&test.dst_in;
488    test.connects_left = connections;
489    test.channel = rdma_create_event_channel();
490    if (!test.channel)
491    {
492        printf("failed to create event channel\n");
493        exit(1);
494    }
495    if (alloc_nodes())
496        exit(1);
497    ret = run();
498    printf("test complete\n");
499    destroy_nodes();
500    rdma_destroy_event_channel(test.channel);
501    printf("return status %d\n", ret);
502    return ret;
503}