00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #ifndef RDMA_WRAP_H
00022 #define RDMA_WRAP_H
00023
00024 #include "rdma_factories.h"
00025
00026 #include <rdma/rdma_cma.h>
00027
00028 #include "qpid/RefCounted.h"
00029 #include "qpid/sys/IOHandle.h"
00030 #include "qpid/sys/posix/PrivatePosix.h"
00031
00032 #include <fcntl.h>
00033
00034 #include <vector>
00035 #include <algorithm>
00036 #include <iostream>
00037 #include <stdexcept>
00038 #include <boost/shared_ptr.hpp>
00039 #include <boost/intrusive_ptr.hpp>
00040
00041 namespace Rdma {
00042 const int DEFAULT_TIMEOUT = 2000;
00043 const int DEFAULT_BACKLOG = 100;
00044 const int DEFAULT_CQ_ENTRIES = 256;
00045 const int DEFAULT_WR_ENTRIES = 64;
00046 const ::rdma_conn_param DEFAULT_CONNECT_PARAM = {
00047 0,
00048 0,
00049 4,
00050 4,
00051 0,
00052 5,
00053 7
00054 };
00055
00056 struct Buffer {
00057 friend class QueuePair;
00058
00059 char* const bytes;
00060 const int32_t byteCount;
00061 int32_t dataStart;
00062 int32_t dataCount;
00063
00064 Buffer(::ibv_pd* pd, char* const b, const int32_t s) :
00065 bytes(b),
00066 byteCount(s),
00067 dataStart(0),
00068 dataCount(0),
00069 mr(CHECK_NULL(::ibv_reg_mr(
00070 pd, bytes, byteCount,
00071 ::IBV_ACCESS_LOCAL_WRITE)))
00072 {}
00073
00074 ~Buffer() {
00075 (void) ::ibv_dereg_mr(mr);
00076 delete [] bytes;
00077 }
00078
00079 private:
00080 ::ibv_mr* mr;
00081 };
00082
00083 class Connection;
00084
00085 enum QueueDirection {
00086 NONE,
00087 SEND,
00088 RECV
00089 };
00090
00091 class QueuePairEvent {
00092 boost::shared_ptr< ::ibv_cq > cq;
00093 ::ibv_wc wc;
00094 QueueDirection dir;
00095
00096 friend class QueuePair;
00097
00098 QueuePairEvent() :
00099 dir(NONE)
00100 {}
00101
00102 QueuePairEvent(
00103 const ::ibv_wc& w,
00104 boost::shared_ptr< ::ibv_cq > c,
00105 QueueDirection d) :
00106 cq(c),
00107 wc(w),
00108 dir(d)
00109 {
00110 assert(dir != NONE);
00111 }
00112
00113 public:
00114 operator bool() const {
00115 return dir != NONE;
00116 }
00117
00118 QueueDirection getDirection() const {
00119 return dir;
00120 }
00121
00122 ::ibv_wc_opcode getEventType() const {
00123 return wc.opcode;
00124 }
00125
00126 ::ibv_wc_status getEventStatus() const {
00127 return wc.status;
00128 }
00129
00130 Buffer* getBuffer() const {
00131 Buffer* b = reinterpret_cast<Buffer*>(wc.wr_id);
00132 b->dataCount = wc.byte_len;
00133 return b;
00134 }
00135 };
00136
00137
00138
00139
00140
00141
00142
00143 class QueuePair : public qpid::sys::IOHandle, public qpid::RefCounted {
00144 boost::shared_ptr< ::ibv_pd > pd;
00145 boost::shared_ptr< ::ibv_comp_channel > cchannel;
00146 boost::shared_ptr< ::ibv_cq > scq;
00147 boost::shared_ptr< ::ibv_cq > rcq;
00148 boost::shared_ptr< ::rdma_cm_id > id;
00149 int outstandingSendEvents;
00150 int outstandingRecvEvents;
00151
00152 friend class Connection;
00153
00154 QueuePair(boost::shared_ptr< ::rdma_cm_id > id);
00155 ~QueuePair();
00156
00157 public:
00158 typedef boost::intrusive_ptr<QueuePair> intrusive_ptr;
00159
00160
00161 Buffer* createBuffer(int s) {
00162 return new Buffer(pd.get(), new char[s], s);
00163 }
00164
00165
00166
00167 void nonblocking() {
00168 ::fcntl(cchannel->fd, F_SETFL, O_NONBLOCK);
00169 }
00170
00171
00172
00173 QueuePair::intrusive_ptr getNextChannelEvent() {
00174
00175 ::ibv_cq* cq;
00176 void* ctx;
00177 int rc = ::ibv_get_cq_event(cchannel.get(), &cq, &ctx);
00178 if (rc == -1 && errno == EAGAIN)
00179 return 0;
00180 CHECK(rc);
00181
00182
00183 if (cq == scq.get()) {
00184 if (++outstandingSendEvents > DEFAULT_CQ_ENTRIES / 2) {
00185 ::ibv_ack_cq_events(cq, outstandingSendEvents);
00186 outstandingSendEvents = 0;
00187 }
00188 } else if (cq == rcq.get()) {
00189 if (++outstandingRecvEvents > DEFAULT_CQ_ENTRIES / 2) {
00190 ::ibv_ack_cq_events(cq, outstandingRecvEvents);
00191 outstandingRecvEvents = 0;
00192 }
00193 }
00194
00195 return static_cast<QueuePair*>(ctx);
00196 }
00197
00198 QueuePairEvent getNextEvent() {
00199 ::ibv_wc w;
00200 if (::ibv_poll_cq(scq.get(), 1, &w) == 1)
00201 return QueuePairEvent(w, scq, SEND);
00202 else if (::ibv_poll_cq(rcq.get(), 1, &w) == 1)
00203 return QueuePairEvent(w, rcq, RECV);
00204 else
00205 return QueuePairEvent();
00206 }
00207
00208 void postRecv(Buffer* buf);
00209 void postSend(Buffer* buf);
00210 void notifyRecv();
00211 void notifySend();
00212 };
00213
00214 class ConnectionEvent {
00215 friend class Connection;
00216
00217
00218
00219 boost::intrusive_ptr<Connection> id;
00220 boost::intrusive_ptr<Connection> listen_id;
00221 boost::shared_ptr< ::rdma_cm_event > event;
00222
00223 ConnectionEvent() {}
00224 ConnectionEvent(::rdma_cm_event* e);
00225
00226
00227 public:
00228 operator bool() const {
00229 return event;
00230 }
00231
00232 ::rdma_cm_event_type getEventType() const {
00233 return event->event;
00234 }
00235
00236 ::rdma_conn_param getConnectionParam() const {
00237 if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) {
00238 return event->param.conn;
00239 } else {
00240 ::rdma_conn_param p = {};
00241 return p;
00242 }
00243 }
00244
00245 boost::intrusive_ptr<Connection> getConnection () const {
00246 return id;
00247 }
00248
00249 boost::intrusive_ptr<Connection> getListenId() const {
00250 return listen_id;
00251 }
00252 };
00253
00254
00255
00256
00257
00258
00259
00260 class Connection : public qpid::sys::IOHandle, public qpid::RefCounted {
00261 boost::shared_ptr< ::rdma_event_channel > channel;
00262 boost::shared_ptr< ::rdma_cm_id > id;
00263 QueuePair::intrusive_ptr qp;
00264
00265 void* context;
00266
00267 friend class ConnectionEvent;
00268 friend class QueuePair;
00269
00270
00271
00272 Connection(::rdma_cm_id* i) :
00273 qpid::sys::IOHandle(new qpid::sys::IOHandlePrivate),
00274 id(i, destroyId),
00275 context(0)
00276 {
00277 impl->fd = id->channel->fd;
00278
00279
00280
00281 if (i)
00282 i->context = this;
00283 }
00284
00285 Connection() :
00286 qpid::sys::IOHandle(new qpid::sys::IOHandlePrivate),
00287 channel(mkEChannel()),
00288 id(mkId(channel.get(), this, RDMA_PS_TCP)),
00289 context(0)
00290 {
00291 impl->fd = channel->fd;
00292 }
00293
00294
00295
00296 void ensureQueuePair() {
00297 assert(id.get());
00298
00299
00300 if (qp)
00301 return;
00302
00303 qp = new QueuePair(id);
00304 }
00305
00306 public:
00307 typedef boost::intrusive_ptr<Connection> intrusive_ptr;
00308
00309 static intrusive_ptr make() {
00310 return new Connection();
00311 }
00312
00313 static intrusive_ptr find(::rdma_cm_id* i) {
00314 if (!i)
00315 return 0;
00316 Connection* id = static_cast< Connection* >(i->context);
00317 if (!id)
00318 throw std::logic_error("Couldn't find existing Connection");
00319 return id;
00320 }
00321
00322 template <typename T>
00323 void addContext(T* c) {
00324
00325 if (!context)
00326 context = c;
00327 }
00328
00329 template <typename T>
00330 T* getContext() {
00331 return static_cast<T*>(context);
00332 }
00333
00334
00335
00336 void nonblocking() {
00337 assert(id.get());
00338 ::fcntl(id->channel->fd, F_SETFL, O_NONBLOCK);
00339 }
00340
00341
00342
00343 ConnectionEvent getNextEvent() {
00344 assert(id.get());
00345 ::rdma_cm_event* e;
00346 int rc = ::rdma_get_cm_event(id->channel, &e);
00347 if (rc == -1 && errno == EAGAIN)
00348 return ConnectionEvent();
00349 CHECK(rc);
00350 return ConnectionEvent(e);
00351 }
00352
00353 void bind(sockaddr& src_addr) const {
00354 assert(id.get());
00355 CHECK(::rdma_bind_addr(id.get(), &src_addr));
00356 }
00357
00358 void listen(int backlog = DEFAULT_BACKLOG) const {
00359 assert(id.get());
00360 CHECK(::rdma_listen(id.get(), backlog));
00361 }
00362
00363 void resolve_addr(
00364 sockaddr& dst_addr,
00365 sockaddr* src_addr = 0,
00366 int timeout_ms = DEFAULT_TIMEOUT) const
00367 {
00368 assert(id.get());
00369 CHECK(::rdma_resolve_addr(id.get(), src_addr, &dst_addr, timeout_ms));
00370 }
00371
00372 void resolve_route(int timeout_ms = DEFAULT_TIMEOUT) const {
00373 assert(id.get());
00374 CHECK(::rdma_resolve_route(id.get(), timeout_ms));
00375 }
00376
00377 void disconnect() const {
00378 assert(id.get());
00379 CHECK(::rdma_disconnect(id.get()));
00380 }
00381
00382
00383 void connect() {
00384 assert(id.get());
00385
00386
00387 ensureQueuePair();
00388
00389 ::rdma_conn_param p = DEFAULT_CONNECT_PARAM;
00390 CHECK(::rdma_connect(id.get(), &p));
00391 }
00392
00393 template <typename T>
00394 void connect(const T* data) {
00395 assert(id.get());
00396
00397 ensureQueuePair();
00398
00399 ::rdma_conn_param p = DEFAULT_CONNECT_PARAM;
00400 p.private_data = data;
00401 p.private_data_len = sizeof(T);
00402 CHECK(::rdma_connect(id.get(), &p));
00403 }
00404
00405
00406
00407 template <typename T>
00408 void accept(const ::rdma_conn_param& param, const T* data) {
00409 assert(id.get());
00410
00411 ensureQueuePair();
00412
00413 ::rdma_conn_param p = param;
00414 p.private_data = data;
00415 p.private_data_len = sizeof(T);
00416 CHECK(::rdma_accept(id.get(), &p));
00417 }
00418
00419 void accept(const ::rdma_conn_param& param) {
00420 assert(id.get());
00421
00422 ensureQueuePair();
00423
00424 ::rdma_conn_param p = param;
00425 p.private_data = 0;
00426 p.private_data_len = 0;
00427 CHECK(::rdma_accept(id.get(), &p));
00428 }
00429
00430 template <typename T>
00431 void reject(const T* data) const {
00432 assert(id.get());
00433 CHECK(::rdma_reject(id.get(), data, sizeof(T)));
00434 }
00435
00436 void reject() const {
00437 assert(id.get());
00438 CHECK(::rdma_reject(id.get(), 0, 0));
00439 }
00440
00441 QueuePair::intrusive_ptr getQueuePair() {
00442 assert(id.get());
00443
00444 ensureQueuePair();
00445
00446 return qp;
00447 }
00448 };
00449
00450 inline QueuePair::QueuePair(boost::shared_ptr< ::rdma_cm_id > i) :
00451 qpid::sys::IOHandle(new qpid::sys::IOHandlePrivate),
00452 pd(allocPd(i->verbs)),
00453 cchannel(mkCChannel(i->verbs)),
00454 scq(mkCq(i->verbs, DEFAULT_CQ_ENTRIES, 0, cchannel.get())),
00455 rcq(mkCq(i->verbs, DEFAULT_CQ_ENTRIES, 0, cchannel.get())),
00456 id(i),
00457 outstandingSendEvents(0),
00458 outstandingRecvEvents(0)
00459 {
00460 impl->fd = cchannel->fd;
00461
00462
00463
00464 scq->cq_context = this;
00465 rcq->cq_context = this;
00466
00467 ::ibv_qp_init_attr qp_attr = {};
00468
00469
00470 qp_attr.cap.max_send_wr = DEFAULT_WR_ENTRIES;
00471 qp_attr.cap.max_send_sge = 4;
00472 qp_attr.cap.max_recv_wr = DEFAULT_WR_ENTRIES;
00473 qp_attr.cap.max_recv_sge = 4;
00474
00475 qp_attr.send_cq = scq.get();
00476 qp_attr.recv_cq = rcq.get();
00477 qp_attr.qp_type = IBV_QPT_RC;
00478
00479 CHECK(::rdma_create_qp(id.get(), pd.get(), &qp_attr));
00480
00481
00482 id->qp->qp_context = this;
00483 }
00484
00485 inline QueuePair::~QueuePair() {
00486 if (outstandingSendEvents > 0)
00487 ::ibv_ack_cq_events(scq.get(), outstandingSendEvents);
00488 if (outstandingRecvEvents > 0)
00489 ::ibv_ack_cq_events(rcq.get(), outstandingRecvEvents);
00490
00491 ::rdma_destroy_qp(id.get());
00492 }
00493
00494 inline void QueuePair::notifyRecv() {
00495 CHECK_IBV(ibv_req_notify_cq(rcq.get(), 0));
00496 }
00497
00498 inline void QueuePair::notifySend() {
00499 CHECK_IBV(ibv_req_notify_cq(scq.get(), 0));
00500 }
00501
00502 inline void QueuePair::postRecv(Buffer* buf) {
00503 ::ibv_recv_wr rwr = {};
00504 ::ibv_sge sge;
00505
00506 sge.addr = (uintptr_t) buf->bytes+buf->dataStart;
00507 sge.length = buf->dataCount;
00508 sge.lkey = buf->mr->lkey;
00509
00510 rwr.wr_id = reinterpret_cast<uint64_t>(buf);
00511 rwr.sg_list = &sge;
00512 rwr.num_sge = 1;
00513
00514 ::ibv_recv_wr* badrwr = 0;
00515 CHECK_IBV(::ibv_post_recv(id->qp, &rwr, &badrwr));
00516 if (badrwr)
00517 throw std::logic_error("ibv_post_recv(): Bad rwr");
00518 }
00519
00520 inline void QueuePair::postSend(Buffer* buf) {
00521 ::ibv_send_wr swr = {};
00522 ::ibv_sge sge;
00523
00524 sge.addr = (uintptr_t) buf->bytes+buf->dataStart;
00525 sge.length = buf->dataCount;
00526 sge.lkey = buf->mr->lkey;
00527
00528 swr.wr_id = reinterpret_cast<uint64_t>(buf);
00529 swr.opcode = IBV_WR_SEND;
00530 swr.send_flags = IBV_SEND_SIGNALED;
00531 swr.sg_list = &sge;
00532 swr.num_sge = 1;
00533
00534 ::ibv_send_wr* badswr = 0;
00535 CHECK_IBV(::ibv_post_send(id->qp, &swr, &badswr));
00536 if (badswr)
00537 throw std::logic_error("ibv_post_send(): Bad swr");
00538 }
00539
00540 inline ConnectionEvent::ConnectionEvent(::rdma_cm_event* e) :
00541 id((e->event != RDMA_CM_EVENT_CONNECT_REQUEST) ?
00542 Connection::find(e->id) : new Connection(e->id)),
00543 listen_id(Connection::find(e->listen_id)),
00544 event(e, acker)
00545 {}
00546 }
00547
00548 inline std::ostream& operator<<(std::ostream& o, ::rdma_cm_event_type t) {
00549 # define CHECK_TYPE(t) case t: o << #t; break;
00550 switch(t) {
00551 CHECK_TYPE(RDMA_CM_EVENT_ADDR_RESOLVED)
00552 CHECK_TYPE(RDMA_CM_EVENT_ADDR_ERROR)
00553 CHECK_TYPE(RDMA_CM_EVENT_ROUTE_RESOLVED)
00554 CHECK_TYPE(RDMA_CM_EVENT_ROUTE_ERROR)
00555 CHECK_TYPE(RDMA_CM_EVENT_CONNECT_REQUEST)
00556 CHECK_TYPE(RDMA_CM_EVENT_CONNECT_RESPONSE)
00557 CHECK_TYPE(RDMA_CM_EVENT_CONNECT_ERROR)
00558 CHECK_TYPE(RDMA_CM_EVENT_UNREACHABLE)
00559 CHECK_TYPE(RDMA_CM_EVENT_REJECTED)
00560 CHECK_TYPE(RDMA_CM_EVENT_ESTABLISHED)
00561 CHECK_TYPE(RDMA_CM_EVENT_DISCONNECTED)
00562 CHECK_TYPE(RDMA_CM_EVENT_DEVICE_REMOVAL)
00563 CHECK_TYPE(RDMA_CM_EVENT_MULTICAST_JOIN)
00564 CHECK_TYPE(RDMA_CM_EVENT_MULTICAST_ERROR)
00565 }
00566 # undef CHECK_TYPE
00567 return o;
00568 }
00569
00570 #endif // RDMA_WRAP_H