8.4 使用 RDMA_CM 和 IBV verbs 的代码
mckey.c (show/hide)
1/*
2 * BUILD COMMAND:
3 * gcc -g -Wall -D_GNU_SOURCE -g -O2 -o mckey mckey.c -libverbs -lrdmacm
4 *
5 * $Id$
6 */
7#include <stdlib.h>
8#include <string.h>
9#include <stdio.h>
10#include <errno.h>
11#include <sys/types.h>
12#include <netinet/in.h>
13#include <arpa/inet.h>
14#include <sys/socket.h>
15#include <netdb.h>
16#include <byteswap.h>
17#include <unistd.h>
18#include <getopt.h>
19#include <rdma/rdma_cma.h>
20struct cmatest_node
21{
22 int id;
23 struct rdma_cm_id *cma_id;
24 int connected;
25 struct ibv_pd *pd;
26 struct ibv_cq *cq;
27 struct ibv_mr *mr;
28 struct ibv_ah *ah;
29 uint32_t remote_qpn;
30 uint32_t remote_qkey;
31 void *mem;
32};
33struct cmatest
34{
35 struct rdma_event_channel *channel;
36 struct cmatest_node *nodes;
37 int conn_index;
38 int connects_left;
39 struct sockaddr_in6 dst_in;
40 struct sockaddr *dst_addr;
41 struct sockaddr_in6 src_in;
42 struct sockaddr *src_addr;
43};
44static struct cmatest test;
45static int connections = 1;
46static int message_size = 100;
47static int message_count = 10;
48static int is_sender;
49static int unmapped_addr;
50static char *dst_addr;
51static char *src_addr;
52static enum rdma_port_space port_space = RDMA_PS_UDP;
53static int create_message(struct cmatest_node *node)
54{
55 if (!message_size)
56 message_count = 0;
57 if (!message_count)
58 return 0;
59 node->mem = malloc(message_size + sizeof(struct ibv_grh));
60 if (!node->mem)
61 {
62 printf("failed message allocation\n");
63 return -1;
64 }
65 node->mr = ibv_reg_mr(node->pd, node->mem, message_size + sizeof(struct ibv_grh),
66 IBV_ACCESS_LOCAL_WRITE);
67 if (!node->mr)
68 {
69 printf("failed to reg MR\n");
70 goto err;
71 }
72 return 0;
73err:
74 free(node->mem);
75 return -1;
76}
77static int verify_test_params(struct cmatest_node *node)
78{
79 struct ibv_port_attr port_attr;
80 int ret;
81 ret = ibv_query_port(node->cma_id->verbs, node->cma_id->port_num, &port_attr);
82 if (ret)
83 return ret;
84 if (message_count && message_size > (1 << (port_attr.active_mtu + 7)))
85 {
86 printf("mckey: message_size %d is larger than active mtu %d\n", message_size, 1 << (port_attr.active_mtu + 7));
87 return -EINVAL;
88 }
89 return 0;
90}
91static int init_node(struct cmatest_node *node)
92{
93 struct ibv_qp_init_attr init_qp_attr;
94 int cqe, ret;
95 node->pd = ibv_alloc_pd(node->cma_id->verbs);
96 if (!node->pd)
97 {
98 ret = -ENOMEM;
99 printf("mckey: unable to allocate PD\n");
100 goto out;
101 }
102 cqe = message_count ? message_count * 2 : 2;
103 node->cq = ibv_create_cq(node->cma_id->verbs, cqe, node, 0, 0);
104 if (!node->cq)
105 {
106 ret = -ENOMEM;
107 printf("mckey: unable to create CQ\n");
108 goto out;
109 }
110 memset(&init_qp_attr, 0, sizeof init_qp_attr);
111 init_qp_attr.cap.max_send_wr = message_count ? message_count : 1;
112 init_qp_attr.cap.max_recv_wr = message_count ? message_count : 1;
113 init_qp_attr.cap.max_send_sge = 1;
114 init_qp_attr.cap.max_recv_sge = 1;
115 init_qp_attr.qp_context = node;
116 init_qp_attr.sq_sig_all = 0;
117 init_qp_attr.qp_type = IBV_QPT_UD;
118 init_qp_attr.send_cq = node->cq;
119 init_qp_attr.recv_cq = node->cq;
120 ret = rdma_create_qp(node->cma_id, node->pd, &init_qp_attr);
121 if (ret)
122 {
123 printf("mckey: unable to create QP: %d\n", ret);
124 goto out;
125 }
126 ret = create_message(node);
127 if (ret)
128 {
129 printf("mckey: failed to create messages: %d\n", ret);
130 goto out;
131 }
132out:
133 return ret;
134}
135static int post_recvs(struct cmatest_node *node)
136{
137 struct ibv_recv_wr recv_wr, *recv_failure;
138 struct ibv_sge sge;
139 int i, ret = 0;
140 if (!message_count)
141 return 0;
142 recv_wr.next = NULL;
143 recv_wr.sg_list = &sge;
144 recv_wr.num_sge = 1;
145 recv_wr.wr_id = (uintptr_t)node;
146 sge.length = message_size + sizeof(struct ibv_grh);
147 sge.lkey = node->mr->lkey;
148 sge.addr = (uintptr_t)node->mem;
149 for (i = 0; i < message_count && !ret; i++)
150 {
151 ret = ibv_post_recv(node->cma_id->qp, &recv_wr, &recv_failure);
152 if (ret)
153 {
154 printf("failed to post receives: %d\n", ret);
155 break;
156 }
157 }
158 return ret;
159}
160static int post_sends(struct cmatest_node *node, int signal_flag)
161{
162 struct ibv_send_wr send_wr, *bad_send_wr;
163 struct ibv_sge sge;
164 int i, ret = 0;
165 if (!node->connected || !message_count)
166 return 0;
167 send_wr.next = NULL;
168 send_wr.sg_list = &sge;
169 send_wr.num_sge = 1;
170 send_wr.opcode = IBV_WR_SEND_WITH_IMM;
171 send_wr.send_flags = signal_flag;
172 send_wr.wr_id = (unsigned long)node;
173 send_wr.imm_data = htonl(node->cma_id->qp->qp_num);
174 send_wr.wr.ud.ah = node->ah;
175 send_wr.wr.ud.remote_qpn = node->remote_qpn;
176 send_wr.wr.ud.remote_qkey = node->remote_qkey;
177 sge.length = message_size;
178 sge.lkey = node->mr->lkey;
179 sge.addr = (uintptr_t)node->mem;
180 for (i = 0; i < message_count && !ret; i++)
181 {
182 ret = ibv_post_send(node->cma_id->qp, &send_wr, &bad_send_wr);
183 if (ret)
184 printf("failed to post sends: %d\n", ret);
185 }
186 return ret;
187}
188static void connect_error(void)
189{
190 test.connects_left--;
191}
192static int addr_handler(struct cmatest_node *node)
193{
194 int ret;
195 ret = verify_test_params(node);
196 if (ret)
197 goto err;
198 ret = init_node(node);
199 if (ret)
200 goto err;
201 if (!is_sender)
202 {
203 ret = post_recvs(node);
204 if (ret)
205 goto err;
206 }
207 ret = rdma_join_multicast(node->cma_id, test.dst_addr, node);
208 if (ret)
209 {
210 printf("mckey: failure joining: %d\n", ret);
211 goto err;
212 }
213 return 0;
214err:
215 connect_error();
216 return ret;
217}
218static int join_handler(struct cmatest_node *node, struct rdma_ud_param *param)
219{
220 char buf[40];
221 inet_ntop(AF_INET6, param->ah_attr.grh.dgid.raw, buf, 40);
222 printf("mckey: joined dgid: %s\n", buf);
223 node->remote_qpn = param->qp_num;
224 node->remote_qkey = param->qkey;
225 node->ah = ibv_create_ah(node->pd, ¶m->ah_attr);
226 if (!node->ah)
227 {
228 printf("mckey: failure creating address handle\n");
229 goto err;
230 }
231 node->connected = 1;
232 test.connects_left--;
233 return 0;
234err:
235 connect_error();
236 return -1;
237}
238static int cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
239{
240 int ret = 0;
241 switch (event->event)
242 {
243 case RDMA_CM_EVENT_ADDR_RESOLVED:
244 ret = addr_handler(cma_id->context);
245 break;
246 case RDMA_CM_EVENT_MULTICAST_JOIN:
247 ret = join_handler(cma_id->context, &event->param.ud);
248 break;
249 case RDMA_CM_EVENT_ADDR_ERROR:
250 case RDMA_CM_EVENT_ROUTE_ERROR:
251 case RDMA_CM_EVENT_MULTICAST_ERROR:
252 printf("mckey: event: %s, error: %d\n", rdma_event_str(event->event), event->status);
253 connect_error();
254 ret = event->status;
255 break;
256 case RDMA_CM_EVENT_DEVICE_REMOVAL:
257 /* Cleanup will occur after test completes. */
258 break;
259 default:
260 break;
261 }
262 return ret;
263}
264static void destroy_node(struct cmatest_node *node)
265{
266 if (!node->cma_id)
267 return;
268 if (node->ah)
269 ibv_destroy_ah(node->ah);
270 if (node->cma_id->qp)
271 rdma_destroy_qp(node->cma_id);
272 if (node->cq)
273 ibv_destroy_cq(node->cq);
274 if (node->mem)
275 {
276 ibv_dereg_mr(node->mr);
277 free(node->mem);
278 }
279 if (node->pd)
280 ibv_dealloc_pd(node->pd);
281 /* Destroy the RDMA ID after all device resources */
282 rdma_destroy_id(node->cma_id);
283}
284static int alloc_nodes(void)
285{
286 int ret, i;
287 test.nodes = malloc(sizeof *test.nodes * connections);
288 if (!test.nodes)
289 {
290 printf("mckey: unable to allocate memory for test nodes\n");
291 return -ENOMEM;
292 }
293 memset(test.nodes, 0, sizeof *test.nodes * connections);
294 for (i = 0; i < connections; i++)
295 {
296 test.nodes[i].id = i;
297 ret = rdma_create_id(test.channel, &test.nodes[i].cma_id, &test.nodes[i], port_space);
298 if (ret)
299 goto err;
300 }
301 return 0;
302err:
303 while (--i >= 0)
304 rdma_destroy_id(test.nodes[i].cma_id);
305 free(test.nodes);
306 return ret;
307}
308static void destroy_nodes(void)
309{
310 int i;
311 for (i = 0; i < connections; i++)
312 destroy_node(&test.nodes[i]);
313 free(test.nodes);
314}
315static int poll_cqs(void)
316{
317 struct ibv_wc wc[8];
318 int done, i, ret;
319 for (i = 0; i < connections; i++)
320 {
321 if (!test.nodes[i].connected)
322 continue;
323 for (done = 0; done < message_count; done += ret)
324 {
325 ret = ibv_poll_cq(test.nodes[i].cq, 8, wc);
326 if (ret < 0)
327 {
328 printf("mckey: failed polling CQ: %d\n", ret);
329 return ret;
330 }
331 }
332 }
333 return 0;
334}
335static int connect_events(void)
336{
337 struct rdma_cm_event *event;
338 int ret = 0;
339 while (test.connects_left && !ret)
340 {
341 ret = rdma_get_cm_event(test.channel, &event);
342 if (!ret)
343 {
344 ret = cma_handler(event->id, event);
345 rdma_ack_cm_event(event);
346 }
347 }
348 return ret;
349}
350static int get_addr(char *dst, struct sockaddr *addr)
351{
352 struct addrinfo *res;
353 int ret;
354 ret = getaddrinfo(dst, NULL, NULL, &res);
355 if (ret)
356 {
357 printf("getaddrinfo failed - invalid hostname or IP address\n");
358 return ret;
359 }
360 memcpy(addr, res->ai_addr, res->ai_addrlen);
361 freeaddrinfo(res);
362 return ret;
363}
364static int run(void)
365{
366 int i, ret;
367 printf("mckey: starting %s\n", is_sender ? "client" : "server");
368 if (src_addr)
369 {
370 ret = get_addr(src_addr, (struct sockaddr *)&test.src_in);
371 if (ret)
372 return ret;
373 }
374 ret = get_addr(dst_addr, (struct sockaddr *)&test.dst_in);
375 if (ret)
376 return ret;
377 printf("mckey: joining\n");
378 for (i = 0; i < connections; i++)
379 {
380 if (src_addr)
381 {
382 ret = rdma_bind_addr(test.nodes[i].cma_id, test.src_addr);
383 if (ret)
384 {
385 printf("mckey: addr bind failure: %d\n", ret);
386 connect_error();
387 return ret;
388 }
389 }
390 if (unmapped_addr)
391 ret = addr_handler(&test.nodes[i]);
392 else
393 ret = rdma_resolve_addr(test.nodes[i].cma_id, test.src_addr, test.dst_addr,
394 2000);
395 if (ret)
396 {
397 printf("mckey: resolve addr failure: %d\n", ret);
398 connect_error();
399 return ret;
400 }
401 }
402 ret = connect_events();
403 if (ret)
404 goto out;
405 /*
406 * Pause to give SM chance to configure switches. We don't want to
407 * handle reliability issue in this simple test program.
408 */
409 sleep(3);
410 if (message_count)
411 {
412 if (is_sender)
413 {
414 printf("initiating data transfers\n");
415 for (i = 0; i < connections; i++)
416 {
417 ret = post_sends(&test.nodes[i], 0);
418 if (ret)
419 goto out;
420 }
421 }
422 else
423 {
424 printf("receiving data transfers\n");
425 ret = poll_cqs();
426 if (ret)
427 goto out;
428 }
429 printf("data transfers complete\n");
430 }
431out:
432 for (i = 0; i < connections; i++)
433 {
434 ret = rdma_leave_multicast(test.nodes[i].cma_id, test.dst_addr);
435 if (ret)
436 printf("mckey: failure leaving: %d\n", ret);
437 }
438 return ret;
439}
440int main(int argc, char **argv)
441{
442 int op, ret;
443 while ((op = getopt(argc, argv, "m:M:sb:c:C:S:p:")) != -1)
444 {
445 switch (op)
446 {
447 case 'm':
448 dst_addr = optarg;
449 break;
450 case 'M':
451 unmapped_addr = 1;
452 dst_addr = optarg;
453 break;
454 case 's':
455 is_sender = 1;
456 break;
457 case 'b':
458 src_addr = optarg;
459 test.src_addr = (struct sockaddr *)&test.src_in;
460 break;
461 case 'c':
462 connections = atoi(optarg);
463 break;
464 case 'C':
465 message_count = atoi(optarg);
466 break;
467 case 'S':
468 message_size = atoi(optarg);
469 break;
470 case 'p':
471 port_space = strtol(optarg, NULL, 0);
472 break;
473 default:
474 printf("usage: %s\n", argv[0]);
475 printf("\t-m multicast_address\n");
476 printf("\t[-M unmapped_multicast_address]\n"
477 "\t replaces -m and requires -b\n");
478 printf("\t[-s(ender)]\n");
479 printf("\t[-b bind_address]\n");
480 printf("\t[-c connections]\n");
481 printf("\t[-C message_count]\n");
482 printf("\t[-S message_size]\n");
483 printf("\t[-p port_space - %#x for UDP (default), %#x for IPOIB]\n", RDMA_PS_UDP, RDMA_PS_IPOIB);
484 exit(1);
485 }
486 }
487 test.dst_addr = (struct sockaddr *)&test.dst_in;
488 test.connects_left = connections;
489 test.channel = rdma_create_event_channel();
490 if (!test.channel)
491 {
492 printf("failed to create event channel\n");
493 exit(1);
494 }
495 if (alloc_nodes())
496 exit(1);
497 ret = run();
498 printf("test complete\n");
499 destroy_nodes();
500 rdma_destroy_event_channel(test.channel);
501 printf("return status %d\n", ret);
502 return ret;
503}