以下的分析基于libvirt 3.0版本。
libvirt是一套免费,开源的支持linux下的主流虚拟化管理工具,目前有大量的应用程序构建在libvirt之上,很多虚拟化产品的开发都是灵活调用libvirt的API接口去实现的。对于应用程序,libvirt提供一套非阻塞调用的框架。
涉及相关的API:
virInitialize:初始化libvirt库,主要针对多线程编程
virEventRegisterDefaultImpl:基于poll系统调用注册默认事件实现,这是一个通用实现。
virEventRunDefaultImpl:循环运行事件,需要在线程中单独运行该函数
virConnectOpen:连接libvirt服务端,即libvirtd
virConnectSetKeepAlive:设置保活周期,此函数控制客户端发送keepalive消息。
相关的实现代码如下:
if (virInitialize()< 0) {
printf("callvirInitialize initialize libvirt fail\n");
return 1;
}
/* register defaultlibvirt event implement */
if (virEventRegisterDefaultImpl() < 0) { //必须
printf("failed to registerdefault event implementation\n");
return 2;
}
//创建线程去分发事件,必须
void*libvirt_thread_cb(void *data)
{
(void)data;
while (!isexit) {
virEventRunDefaultImpl();
}
pthread_exit((void*)"libvirt pthreadwill exit!!!");
return (void*)0;
}
//连接libvirt服务端,并且设计保活机制
contor =virConnectOpen(NULL);//参数为空,表示连接本地的libvirtd服务端
if (contor) {
//第二个参数,心跳发送周期; 第三个参数,心跳参数,当超过该次数时,连接断开
if (virConnectSetKeepAlive(contor, 10, 6)< 0) {
printf("failed to set connkeep alive config\n");
virConnectClose(contor);
contor = NULL;
}
}
通过以上的实现,后续就可以非阻塞调用libvirt其它的API接口,当libvirtd阻塞时,能够超时返回。
对于libvirtd端相关的心跳周期保存在libvirtd.conf文件中,可以修改参数,然后再重启libvirtd即可生效:
keepalive_interval = 10 //default is 5s
keepalive_count = 6 //default is 5s
相关的源码分析:
virEventRegisterDefaultImpl函数分析,源码如下:
int virEventRegisterDefaultImpl(void)
{ VIR_DEBUG("registering default event implementation"); virResetLastError(); if (virEventPollInit() < 0) { virDispatchError(NULL); return -1; } virEventRegisterImpl( virEventPollAddHandle, virEventPollUpdateHandle, virEventPollRemoveHandle, virEventPollAddTimeout, virEventPollUpdateTimeout, virEventPollRemoveTimeout ); return 0;}通过源码分析,调用virEventRegisterImpl函数对全局函数赋值,为什么会这样做,下一步再分析virConnectSetKeepAlive函数分析:
通过源码分析,最终会调用virKeepAliveStart函数去启用定时器,发送keepalive消息。
virKeepAliveStart(virKeepAlivePtr ka,
int interval, unsigned int count){ int ret = -1; time_t delay; int timeout; time_t now; virObjectLock(ka); if (ka->timer >= 0) { //如果定时器存在,不做处理 VIR_DEBUG("Keepalive messages already enabled"); ret = 0; goto cleanup; } if (interval > 0) { if (ka->interval > 0) { //心跳周期已设置,不在设置 virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("keepalive interval already set")); goto cleanup; } /* Guard against overflow */ if (interval > INT_MAX / 1000) { virReportError(VIR_ERR_INTERNAL_ERROR, _("keepalive interval %d too large"), interval); goto cleanup; } ka->interval = interval; ka->count = count; ka->countToDeath = count; } if (ka->interval <= 0) { //心跳周期小于0,禁用keepalive VIR_DEBUG("Keepalive messages disabled by configuration"); ret = 0; goto cleanup; } PROBE(RPC_KEEPALIVE_START, "ka=%p client=%p interval=%d count=%u", ka, ka->client, interval, count); now = time(NULL); delay = now - ka->lastPacketReceived; if (delay > ka->interval) timeout = 0; else timeout = ka->interval - delay; ka->intervalStart = now - (ka->interval - timeout); ka->timer = virEventAddTimeout(timeout * 1000, virKeepAliveTimer, ka, virObjectFreeCallback);//创建心跳定时器 if (ka->timer < 0) goto cleanup; /* the timer now has another reference to this object */ virObjectRef(ka); ret = 0; cleanup: virObjectUnlock(ka); return ret;}继续分析virEventAddTimeout函数,源码如下:intvirEventAddTimeout(int timeout, virEventTimeoutCallback cb, void *opaque, virFreeCallback ff){ if (!addTimeoutImpl) return -1; return addTimeoutImpl(timeout, cb, opaque, ff);}
该函数实现很简单,调用全局的函数去设置定时器,addTimeoutImpl是全局的函数接口,这个函数的赋值是应用程式调用virEventRegisterDefaultImpl函数去设置的,这是为什么需要调用virEventRegisterDefaultImpl函数的原因。
如果没有提供以上的设置,为什么会阻塞,阻塞在哪个地方,通过gdb的调用,发现阻塞在virNetClientIOEventLoop函数中的poll系统调用上。堆栈如下:
(gdb) bt#0 virNetClientIOEventLoop (client=0x2026d30, thiscall=0x1dc57a0) at ../../../src/rpc/virnetclient.c:1595#1 0x00007fd5dc8368a3 in virNetClientIO (client=0x2026d30, thiscall=0x1dc57a0) at ../../../src/rpc/virnetclient.c:1950#2 0x00007fd5dc8370b7 in virNetClientSendInternal (client=0x2026d30, msg=0x2026c60, expectReply=true, nonBlock=false) at ../../../src/rpc/virnetclient.c:2122#3 0x00007fd5dc837141 in virNetClientSendWithReply (client=0x2026d30, msg=0x2026c60) at ../../../src/rpc/virnetclient.c:2150#4 0x00007fd5dc838048 in virNetClientProgramCall (prog=0x2027150, client=0x2026d30, serial=8, proc=212, noutfds=0, outfds=0x0, ninfds=0x0, infds=0x0, args_filter=0x7fd5dc82ce9f <xdr_remote_domain_get_state_args>, args=0x7fffd92ffba0, ret_filter=0x7fd5dc82cf19 <xdr_remote_domain_get_state_ret>, ret=0x7fffd92ffb80) at ../../../src/rpc/virnetclientprogram.c:329#5 0x00007fd5dc819442 in callFull (conn=0x1a24380, priv=0x1a77660, flags=0, fdin=0x0, fdinlen=0, fdout=0x0, fdoutlen=0x0, proc_nr=212, args_filter=0x7fd5dc82ce9f <xdr_remote_domain_get_state_args>, args=0x7fffd92ffba0 "0T\002\002", ret_filter=0x7fd5dc82cf19 <xdr_remote_domain_get_state_ret>, ret=0x7fffd92ffb80 "") at ../../../src/remote/remote_driver.c:6637#6 0x00007fd5dc819515 in call (conn=0x1a24380, priv=0x1a77660, flags=0, proc_nr=212, args_filter=0x7fd5dc82ce9f <xdr_remote_domain_get_state_args>, args=0x7fffd92ffba0 "0T\002\002", ret_filter=0x7fd5dc82cf19 <xdr_remote_domain_get_state_ret>, ret=0x7fffd92ffb80 "") at ../../../src/remote/remote_driver.c:6659#7 0x00007fd5dc7fd77b in remoteDomainGetState (domain=0x1dc8f60, state=0x7fffd92ffcdc, reason=0x0, flags=0) at ../../../src/remote/remote_driver.c:2458#8 0x00007fd5dc7b5b2f in virDomainGetState (domain=0x1dc8f60, state=0x7fffd92ffcdc, reason=0x0, flags=0) at ../../../src/libvirt-domain.c:2495
virNetClientIOEventLoop函数分析:
static int virNetClientIOEventLoop(virNetClientPtr client,
virNetClientCallPtr thiscall){ struct pollfd fds[2]; int ret; fds[0].fd = virNetSocketGetFD(client->sock); fds[1].fd = client->wakeupReadFD; for (;;) { /* If we are non-blocking, then we don't want to sleep in poll() */ if (thiscall->nonBlock) timeout = 0; /* Limit timeout so that we can send keepalive request in time */ if (timeout == -1) timeout = virKeepAliveTimeout(client->keepalive);//返回-1,导致poll阻塞 fds[0].events = fds[0].revents = 0; fds[1].events = fds[1].revents = 0; fds[1].events = POLLIN; /* Calculate poll events for calls */ virNetClientCallMatchPredicate(client->waitDispatch, virNetClientIOEventLoopPollEvents, &fds[0]); if (client->nstreams) fds[0].events |= POLLIN; repoll: ret = poll(fds, ARRAY_CARDINALITY(fds), timeout); if (ret < 0 && (errno == EAGAIN || errno == EINTR)) goto repoll;}
通过源码可以分析,由于应用程序没有调用virConnectSetKeepAlive函数设置心跳保活机制,导致client->keepalive为NULL,分析virKeepAliveTimeout函数可知,当client->keepalive为NULL时,直接返回为-1;导致poll系统调用一直阻塞,直到有事件响应。
总结:通过以上设置,调用virConnectOpen函数连接libvirt的时,就可以实现非阻塞调用libvirt其它的API,当libvirt阻塞时,不会导致调用者阻塞。
当libvirt主线程阻塞时,上述的设置并不能解决virConnectOpen阻塞的问题,需要修改libvirt相关的代码。至于为什么,自己去思考,怎么解决这个问题?