VirtualBox

source: vbox/trunk/src/VBox/VMM/PDMAsyncCompletionFile.cpp@ 26671

Last change on this file since 26671 was 26671, checked in by vboxsync, 15 years ago

AsyncCompletion: Make it possible to limit the bandwidth of a VM

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 35.9 KB
Line 
1/* $Id: PDMAsyncCompletionFile.cpp 26671 2010-02-22 07:21:34Z vboxsync $ */
2/** @file
3 * PDM Async I/O - Transport data asynchronous in R3 using EMT.
4 */
5
6/*
7 * Copyright (C) 2006-2009 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.215389.xyz. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PDM_ASYNC_COMPLETION
27#define RT_STRICT
28//#define DEBUG
29#include "PDMInternal.h"
30#include <VBox/pdm.h>
31#include <VBox/mm.h>
32#include <VBox/vm.h>
33#include <VBox/err.h>
34#include <VBox/log.h>
35
36#include <iprt/asm.h>
37#include <iprt/assert.h>
38#include <iprt/critsect.h>
39#include <iprt/env.h>
40#include <iprt/file.h>
41#include <iprt/mem.h>
42#include <iprt/semaphore.h>
43#include <iprt/string.h>
44#include <iprt/thread.h>
45#include <iprt/path.h>
46
47#include "PDMAsyncCompletionFileInternal.h"
48
49/**
50 * Frees a task.
51 *
52 * @returns nothing.
53 * @param pEndpoint Pointer to the endpoint the segment was for.
54 * @param pTask The task to free.
55 */
56void pdmacFileTaskFree(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
57 PPDMACTASKFILE pTask)
58{
59 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClass = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
60
61 LogFlowFunc((": pEndpoint=%p pTask=%p\n", pEndpoint, pTask));
62
63 /* Try the per endpoint cache first. */
64 if (pEndpoint->cTasksCached < pEpClass->cTasksCacheMax)
65 {
66 /* Add it to the list. */
67 pEndpoint->pTasksFreeTail->pNext = pTask;
68 pEndpoint->pTasksFreeTail = pTask;
69 ASMAtomicIncU32(&pEndpoint->cTasksCached);
70 }
71 else if (false)
72 {
73 /* Bigger class cache */
74 }
75 else
76 {
77 Log(("Freeing task %p because all caches are full\n", pTask));
78 MMR3HeapFree(pTask);
79 }
80}
81
82/**
83 * Allocates a task segment
84 *
85 * @returns Pointer to the new task segment or NULL
86 * @param pEndpoint Pointer to the endpoint
87 */
88PPDMACTASKFILE pdmacFileTaskAlloc(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
89{
90 PPDMACTASKFILE pTask = NULL;
91
92 /* Try the small per endpoint cache first. */
93 if (pEndpoint->pTasksFreeHead == pEndpoint->pTasksFreeTail)
94 {
95 /* Try the bigger endpoint class cache. */
96 PPDMASYNCCOMPLETIONEPCLASSFILE pEndpointClass = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
97
98#if 0
99 /* We start with the assigned slot id to distribute the load when allocating new tasks. */
100 unsigned iSlot = pEndpoint->iSlotStart;
101 do
102 {
103 pTask = (PPDMASYNCCOMPLETIONTASK)ASMAtomicXchgPtr((void * volatile *)&pEndpointClass->apTaskCache[iSlot], NULL);
104 if (pTask)
105 break;
106
107 iSlot = (iSlot + 1) % RT_ELEMENTS(pEndpointClass->apTaskCache);
108 } while (iSlot != pEndpoint->iSlotStart);
109#endif
110 if (!pTask)
111 {
112 /*
113 * Allocate completely new.
114 * If this fails we return NULL.
115 */
116 int rc = MMR3HeapAllocZEx(pEndpointClass->Core.pVM, MM_TAG_PDM_ASYNC_COMPLETION,
117 sizeof(PDMACTASKFILE),
118 (void **)&pTask);
119 if (RT_FAILURE(rc))
120 pTask = NULL;
121
122 LogFlow(("Allocated task %p\n", pTask));
123 }
124#if 0
125 else
126 {
127 /* Remove the first element and put the rest into the slot again. */
128 PPDMASYNCCOMPLETIONTASK pTaskHeadNew = pTask->pNext;
129
130 pTaskHeadNew->pPrev = NULL;
131
132 /* Put back into the list adding any new tasks. */
133 while (true)
134 {
135 bool fChanged = ASMAtomicCmpXchgPtr((void * volatile *)&pEndpointClass->apTaskCache[iSlot], pTaskHeadNew, NULL);
136
137 if (fChanged)
138 break;
139
140 PPDMASYNCCOMPLETIONTASK pTaskHead = (PPDMASYNCCOMPLETIONTASK)ASMAtomicXchgPtr((void * volatile *)&pEndpointClass->apTaskCache[iSlot], NULL);
141
142 /* The new task could be taken inbetween */
143 if (pTaskHead)
144 {
145 /* Go to the end of the probably much shorter new list. */
146 PPDMASYNCCOMPLETIONTASK pTaskTail = pTaskHead;
147 while (pTaskTail->pNext)
148 pTaskTail = pTaskTail->pNext;
149
150 /* Concatenate */
151 pTaskTail->pNext = pTaskHeadNew;
152
153 pTaskHeadNew = pTaskHead;
154 }
155 /* Another round trying to change the list. */
156 }
157 /* We got a task from the global cache so decrement the counter */
158 ASMAtomicDecU32(&pEndpointClass->cTasksCached);
159 }
160#endif
161 }
162 else
163 {
164 /* Grab a free task from the head. */
165 AssertMsg(pEndpoint->cTasksCached > 0, ("No tasks cached but list contains more than one element\n"));
166
167 pTask = pEndpoint->pTasksFreeHead;
168 pEndpoint->pTasksFreeHead = pTask->pNext;
169 ASMAtomicDecU32(&pEndpoint->cTasksCached);
170 }
171
172 pTask->pNext = NULL;
173
174 return pTask;
175}
176
177PPDMACTASKFILE pdmacFileEpGetNewTasks(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
178{
179 PPDMACTASKFILE pTasks = NULL;
180
181 /*
182 * Get pending tasks.
183 */
184 pTasks = (PPDMACTASKFILE)ASMAtomicXchgPtr((void * volatile *)&pEndpoint->pTasksNewHead, NULL);
185
186 /* Reverse the list to process in FIFO order. */
187 if (pTasks)
188 {
189 PPDMACTASKFILE pTask = pTasks;
190
191 pTasks = NULL;
192
193 while (pTask)
194 {
195 PPDMACTASKFILE pCur = pTask;
196 pTask = pTask->pNext;
197 pCur->pNext = pTasks;
198 pTasks = pCur;
199 }
200 }
201
202 return pTasks;
203}
204
205static void pdmacFileAioMgrWakeup(PPDMACEPFILEMGR pAioMgr)
206{
207 bool fWokenUp = ASMAtomicXchgBool(&pAioMgr->fWokenUp, true);
208
209 if (!fWokenUp)
210 {
211 int rc = VINF_SUCCESS;
212 bool fWaitingEventSem = ASMAtomicReadBool(&pAioMgr->fWaitingEventSem);
213
214 if (fWaitingEventSem)
215 rc = RTSemEventSignal(pAioMgr->EventSem);
216
217 AssertRC(rc);
218 }
219}
220
221static int pdmacFileAioMgrWaitForBlockingEvent(PPDMACEPFILEMGR pAioMgr, PDMACEPFILEAIOMGRBLOCKINGEVENT enmEvent)
222{
223 int rc = VINF_SUCCESS;
224
225 ASMAtomicWriteU32((volatile uint32_t *)&pAioMgr->enmBlockingEvent, enmEvent);
226 Assert(!pAioMgr->fBlockingEventPending);
227 ASMAtomicXchgBool(&pAioMgr->fBlockingEventPending, true);
228
229 /* Wakeup the async I/O manager */
230 pdmacFileAioMgrWakeup(pAioMgr);
231
232 /* Wait for completion. */
233 rc = RTSemEventWait(pAioMgr->EventSemBlock, RT_INDEFINITE_WAIT);
234 AssertRC(rc);
235
236 ASMAtomicXchgBool(&pAioMgr->fBlockingEventPending, false);
237 ASMAtomicWriteU32((volatile uint32_t *)&pAioMgr->enmBlockingEvent, PDMACEPFILEAIOMGRBLOCKINGEVENT_INVALID);
238
239 return rc;
240}
241
242int pdmacFileAioMgrAddEndpoint(PPDMACEPFILEMGR pAioMgr, PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
243{
244 int rc;
245
246 rc = RTCritSectEnter(&pAioMgr->CritSectBlockingEvent);
247 AssertRCReturn(rc, rc);
248
249 ASMAtomicWritePtr((void * volatile *)&pAioMgr->BlockingEventData.AddEndpoint.pEndpoint, pEndpoint);
250 rc = pdmacFileAioMgrWaitForBlockingEvent(pAioMgr, PDMACEPFILEAIOMGRBLOCKINGEVENT_ADD_ENDPOINT);
251
252 RTCritSectLeave(&pAioMgr->CritSectBlockingEvent);
253
254 if (RT_SUCCESS(rc))
255 ASMAtomicWritePtr((void * volatile *)&pEndpoint->pAioMgr, pAioMgr);
256
257 return rc;
258}
259
260static int pdmacFileAioMgrRemoveEndpoint(PPDMACEPFILEMGR pAioMgr, PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
261{
262 int rc;
263
264 rc = RTCritSectEnter(&pAioMgr->CritSectBlockingEvent);
265 AssertRCReturn(rc, rc);
266
267 ASMAtomicWritePtr((void * volatile *)&pAioMgr->BlockingEventData.RemoveEndpoint.pEndpoint, pEndpoint);
268 rc = pdmacFileAioMgrWaitForBlockingEvent(pAioMgr, PDMACEPFILEAIOMGRBLOCKINGEVENT_REMOVE_ENDPOINT);
269
270 RTCritSectLeave(&pAioMgr->CritSectBlockingEvent);
271
272 return rc;
273}
274
275static int pdmacFileAioMgrCloseEndpoint(PPDMACEPFILEMGR pAioMgr, PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
276{
277 int rc;
278
279 rc = RTCritSectEnter(&pAioMgr->CritSectBlockingEvent);
280 AssertRCReturn(rc, rc);
281
282 ASMAtomicWritePtr((void * volatile *)&pAioMgr->BlockingEventData.CloseEndpoint.pEndpoint, pEndpoint);
283 rc = pdmacFileAioMgrWaitForBlockingEvent(pAioMgr, PDMACEPFILEAIOMGRBLOCKINGEVENT_CLOSE_ENDPOINT);
284
285 RTCritSectLeave(&pAioMgr->CritSectBlockingEvent);
286
287 return rc;
288}
289
290static int pdmacFileAioMgrShutdown(PPDMACEPFILEMGR pAioMgr)
291{
292 int rc;
293
294 rc = RTCritSectEnter(&pAioMgr->CritSectBlockingEvent);
295 AssertRCReturn(rc, rc);
296
297 rc = pdmacFileAioMgrWaitForBlockingEvent(pAioMgr, PDMACEPFILEAIOMGRBLOCKINGEVENT_SHUTDOWN);
298
299 RTCritSectLeave(&pAioMgr->CritSectBlockingEvent);
300
301 return rc;
302}
303
304int pdmacFileEpAddTask(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTask)
305{
306 PPDMACTASKFILE pNext;
307 do
308 {
309 pNext = pEndpoint->pTasksNewHead;
310 pTask->pNext = pNext;
311 } while (!ASMAtomicCmpXchgPtr((void * volatile *)&pEndpoint->pTasksNewHead, (void *)pTask, (void *)pNext));
312
313 pdmacFileAioMgrWakeup((PPDMACEPFILEMGR)ASMAtomicReadPtr((void * volatile *)&pEndpoint->pAioMgr));
314
315 return VINF_SUCCESS;
316}
317
318void pdmacFileEpTaskCompleted(PPDMACTASKFILE pTask, void *pvUser)
319{
320 PPDMASYNCCOMPLETIONTASKFILE pTaskFile = (PPDMASYNCCOMPLETIONTASKFILE)pvUser;
321
322 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_FLUSH)
323 {
324 pdmR3AsyncCompletionCompleteTask(&pTaskFile->Core, true);
325 }
326 else
327 {
328 Assert((uint32_t)pTask->DataSeg.cbSeg == pTask->DataSeg.cbSeg && (int32_t)pTask->DataSeg.cbSeg >= 0);
329 uint32_t uOld = ASMAtomicSubS32(&pTaskFile->cbTransferLeft, (int32_t)pTask->DataSeg.cbSeg);
330
331 if (!(uOld - pTask->DataSeg.cbSeg)
332 && !ASMAtomicXchgBool(&pTaskFile->fCompleted, true))
333 pdmR3AsyncCompletionCompleteTask(&pTaskFile->Core, true);
334 }
335}
336
337int pdmacFileEpTaskInitiate(PPDMASYNCCOMPLETIONTASK pTask,
338 PPDMASYNCCOMPLETIONENDPOINT pEndpoint, RTFOFF off,
339 PCPDMDATASEG paSegments, size_t cSegments,
340 size_t cbTransfer, PDMACTASKFILETRANSFER enmTransfer)
341{
342 int rc = VINF_SUCCESS;
343 PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEndpoint;
344 PPDMASYNCCOMPLETIONTASKFILE pTaskFile = (PPDMASYNCCOMPLETIONTASKFILE)pTask;
345 PPDMACEPFILEMGR pAioMgr = pEpFile->pAioMgr;
346
347 Assert( (enmTransfer == PDMACTASKFILETRANSFER_READ)
348 || (enmTransfer == PDMACTASKFILETRANSFER_WRITE));
349
350 Assert((uint32_t)cbTransfer == cbTransfer && (int32_t)cbTransfer >= 0);
351 ASMAtomicWriteS32(&pTaskFile->cbTransferLeft, (int32_t)cbTransfer);
352 ASMAtomicWriteBool(&pTaskFile->fCompleted, false);
353
354 for (unsigned i = 0; i < cSegments; i++)
355 {
356 PPDMACTASKFILE pIoTask = pdmacFileTaskAlloc(pEpFile);
357 AssertPtr(pIoTask);
358
359 pIoTask->pEndpoint = pEpFile;
360 pIoTask->enmTransferType = enmTransfer;
361 pIoTask->Off = off;
362 pIoTask->DataSeg.cbSeg = paSegments[i].cbSeg;
363 pIoTask->DataSeg.pvSeg = paSegments[i].pvSeg;
364 pIoTask->pvUser = pTaskFile;
365 pIoTask->pfnCompleted = pdmacFileEpTaskCompleted;
366
367 /* Send it off to the I/O manager. */
368 pdmacFileEpAddTask(pEpFile, pIoTask);
369 off += paSegments[i].cbSeg;
370 cbTransfer -= paSegments[i].cbSeg;
371 }
372
373 AssertMsg(!cbTransfer, ("Incomplete transfer %u bytes left\n", cbTransfer));
374
375 if (ASMAtomicReadS32(&pTaskFile->cbTransferLeft) == 0
376 && !ASMAtomicXchgBool(&pTaskFile->fCompleted, true))
377 pdmR3AsyncCompletionCompleteTask(pTask, false);
378 else
379 rc = VINF_AIO_TASK_PENDING;
380
381 return rc;
382}
383
384/**
385 * Creates a new async I/O manager.
386 *
387 * @returns VBox status code.
388 * @param pEpClass Pointer to the endpoint class data.
389 * @param ppAioMgr Where to store the pointer to the new async I/O manager on success.
390 * @param fFailsafe Flag to force a failsafe manager even if the global flag is not set.
391 */
392int pdmacFileAioMgrCreate(PPDMASYNCCOMPLETIONEPCLASSFILE pEpClass, PPPDMACEPFILEMGR ppAioMgr, bool fFailsafe)
393{
394 int rc = VINF_SUCCESS;
395 PPDMACEPFILEMGR pAioMgrNew;
396
397 LogFlowFunc((": Entered\n"));
398
399 rc = MMR3HeapAllocZEx(pEpClass->Core.pVM, MM_TAG_PDM_ASYNC_COMPLETION, sizeof(PDMACEPFILEMGR), (void **)&pAioMgrNew);
400 if (RT_SUCCESS(rc))
401 {
402 pAioMgrNew->fFailsafe = fFailsafe || pEpClass->fFailsafe;
403
404 rc = RTSemEventCreate(&pAioMgrNew->EventSem);
405 if (RT_SUCCESS(rc))
406 {
407 rc = RTSemEventCreate(&pAioMgrNew->EventSemBlock);
408 if (RT_SUCCESS(rc))
409 {
410 rc = RTCritSectInit(&pAioMgrNew->CritSectBlockingEvent);
411 if (RT_SUCCESS(rc))
412 {
413 /* Init the rest of the manager. */
414 if (!pAioMgrNew->fFailsafe)
415 rc = pdmacFileAioMgrNormalInit(pAioMgrNew);
416
417 if (RT_SUCCESS(rc))
418 {
419 pAioMgrNew->enmState = PDMACEPFILEMGRSTATE_RUNNING;
420
421 rc = RTThreadCreateF(&pAioMgrNew->Thread,
422 pAioMgrNew->fFailsafe
423 ? pdmacFileAioMgrFailsafe
424 : pdmacFileAioMgrNormal,
425 pAioMgrNew,
426 0,
427 RTTHREADTYPE_IO,
428 0,
429 "AioMgr%d-%s", pEpClass->cAioMgrs,
430 pAioMgrNew->fFailsafe
431 ? "F"
432 : "N");
433 if (RT_SUCCESS(rc))
434 {
435 /* Link it into the list. */
436 RTCritSectEnter(&pEpClass->CritSect);
437 pAioMgrNew->pNext = pEpClass->pAioMgrHead;
438 if (pEpClass->pAioMgrHead)
439 pEpClass->pAioMgrHead->pPrev = pAioMgrNew;
440 pEpClass->pAioMgrHead = pAioMgrNew;
441 pEpClass->cAioMgrs++;
442 RTCritSectLeave(&pEpClass->CritSect);
443
444 *ppAioMgr = pAioMgrNew;
445
446 Log(("PDMAC: Successfully created new file AIO Mgr {%s}\n", RTThreadGetName(pAioMgrNew->Thread)));
447 return VINF_SUCCESS;
448 }
449 pdmacFileAioMgrNormalDestroy(pAioMgrNew);
450 }
451 RTCritSectDelete(&pAioMgrNew->CritSectBlockingEvent);
452 }
453 RTSemEventDestroy(pAioMgrNew->EventSem);
454 }
455 RTSemEventDestroy(pAioMgrNew->EventSemBlock);
456 }
457 MMR3HeapFree(pAioMgrNew);
458 }
459
460 LogFlowFunc((": Leave rc=%Rrc\n", rc));
461
462 return rc;
463}
464
465/**
466 * I/O refresh timer callback.
467 */
468static void pdmacFileBwRefresh(PVM pVM, PTMTIMER pTimer, void *pvUser)
469{
470 PPDMACFILEBWMGR pBwMgr = (PPDMACFILEBWMGR)pvUser;
471
472 LogFlowFunc(("pVM=%p pTimer=%p pvUser=%p\n", pVM, pTimer, pvUser));
473
474 /* Reset the counter growing the maximum if allowed and needed */
475 bool fIncreaseNeeded = ASMAtomicReadBool(&pBwMgr->fVMTransferLimitReached);
476
477 if ( fIncreaseNeeded
478 && pBwMgr->cbVMTransferPerSecStart < pBwMgr->cbVMTransferPerSecMax)
479 {
480 pBwMgr->cbVMTransferPerSecStart = RT_MIN(pBwMgr->cbVMTransferPerSecMax, pBwMgr->cbVMTransferPerSecStart + pBwMgr->cbVMTransferPerSecStep);
481 LogFlow(("AIOMgr: Increasing maximum bandwidth to %u bytes/sec\n", pBwMgr->cbVMTransferPerSecStart));
482 }
483
484 /* Update */
485 ASMAtomicWriteU32(&pBwMgr->cbVMTransferAllowed, pBwMgr->cbVMTransferPerSecStart);
486 ASMAtomicWriteBool(&pBwMgr->fVMTransferLimitReached, false);
487
488 /* Arm the timer */
489 TMTimerSetMillies(pTimer, 1000);
490}
491
492/**
493 * Destroys a async I/O manager.
494 *
495 * @returns nothing.
496 * @param pAioMgr The async I/O manager to destroy.
497 */
498static void pdmacFileAioMgrDestroy(PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile, PPDMACEPFILEMGR pAioMgr)
499{
500 int rc = pdmacFileAioMgrShutdown(pAioMgr);
501 AssertRC(rc);
502
503 /* Unlink from the list. */
504 rc = RTCritSectEnter(&pEpClassFile->CritSect);
505 AssertRC(rc);
506
507 PPDMACEPFILEMGR pPrev = pAioMgr->pPrev;
508 PPDMACEPFILEMGR pNext = pAioMgr->pNext;
509
510 if (pPrev)
511 pPrev->pNext = pNext;
512 else
513 pEpClassFile->pAioMgrHead = pNext;
514
515 if (pNext)
516 pNext->pPrev = pPrev;
517
518 pEpClassFile->cAioMgrs--;
519 rc = RTCritSectLeave(&pEpClassFile->CritSect);
520 AssertRC(rc);
521
522 /* Free the ressources. */
523 RTCritSectDelete(&pAioMgr->CritSectBlockingEvent);
524 RTSemEventDestroy(pAioMgr->EventSem);
525 if (!pAioMgr->fFailsafe)
526 pdmacFileAioMgrNormalDestroy(pAioMgr);
527
528 MMR3HeapFree(pAioMgr);
529}
530
531static int pdmacFileBwMgrInitialize(PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile,
532 PCFGMNODE pCfgNode, PPPDMACFILEBWMGR ppBwMgr)
533{
534 int rc = VINF_SUCCESS;
535 PPDMACFILEBWMGR pBwMgr = NULL;
536
537 rc = MMR3HeapAllocZEx(pEpClassFile->Core.pVM, MM_TAG_PDM_ASYNC_COMPLETION,
538 sizeof(PDMACFILEBWMGR),
539 (void **)&pBwMgr);
540 if (RT_SUCCESS(rc))
541 {
542 /* Init I/O flow control. */
543 rc = CFGMR3QueryU32Def(pCfgNode, "VMTransferPerSecMax", &pBwMgr->cbVMTransferPerSecMax, UINT32_MAX);
544 AssertLogRelRCReturn(rc, rc);
545 rc = CFGMR3QueryU32Def(pCfgNode, "VMTransferPerSecStart", &pBwMgr->cbVMTransferPerSecStart, _1M);
546 AssertLogRelRCReturn(rc, rc);
547 rc = CFGMR3QueryU32Def(pCfgNode, "VMTransferPerSecStep", &pBwMgr->cbVMTransferPerSecStep, _1M);
548 AssertLogRelRCReturn(rc, rc);
549
550 pBwMgr->cbVMTransferAllowed = pBwMgr->cbVMTransferPerSecStart;
551
552 /* Init the refresh timer */
553 rc = TMR3TimerCreateInternal(pEpClassFile->Core.pVM,
554 TMCLOCK_REAL,
555 pdmacFileBwRefresh,
556 pBwMgr,
557 "AsyncCompletionFile-BW-Refresh",
558 &pBwMgr->pBwRefreshTimer);
559 if (RT_SUCCESS(rc))
560 *ppBwMgr = pBwMgr;
561 else
562 MMR3HeapFree(pBwMgr);
563 }
564
565 return rc;
566}
567
568static void pdmacFileBwMgrDestroy(PPDMACFILEBWMGR pBwMgr)
569{
570 TMR3TimerDestroy(pBwMgr->pBwRefreshTimer);
571 MMR3HeapFree(pBwMgr);
572}
573
574static void pdmacFileBwRef(PPDMACFILEBWMGR pBwMgr)
575{
576 pBwMgr->cRefs++;
577 if (pBwMgr->cRefs == 1)
578 TMTimerSetMillies(pBwMgr->pBwRefreshTimer, 1000); /* 1sec update interval */
579}
580
581static void pdmacFileBwUnref(PPDMACFILEBWMGR pBwMgr)
582{
583 Assert(pBwMgr->cRefs > 0);
584 pBwMgr->cRefs--;
585 if (!pBwMgr->cRefs)
586 TMTimerStop(pBwMgr->pBwRefreshTimer);
587}
588
589bool pdmacFileBwMgrIsTransferAllowed(PPDMACFILEBWMGR pBwMgr, uint32_t cbTransfer)
590{
591 bool fAllowed = false;
592
593 LogFlowFunc(("pBwMgr=%p cbTransfer=%u\n", pBwMgr, cbTransfer));
594
595 uint32_t cbOld = ASMAtomicSubU32(&pBwMgr->cbVMTransferAllowed, cbTransfer);
596 if (RT_LIKELY(cbOld >= cbTransfer))
597 fAllowed = true;
598 else
599 {
600 /* We are out of ressources */
601 ASMAtomicAddU32(&pBwMgr->cbVMTransferAllowed, cbTransfer);
602 ASMAtomicXchgBool(&pBwMgr->fVMTransferLimitReached, true);
603 }
604
605 LogFlowFunc(("fAllowed=%RTbool\n", fAllowed));
606
607 return fAllowed;
608}
609
610static int pdmacFileInitialize(PPDMASYNCCOMPLETIONEPCLASS pClassGlobals, PCFGMNODE pCfgNode)
611{
612 int rc = VINF_SUCCESS;
613 RTFILEAIOLIMITS AioLimits; /** < Async I/O limitations. */
614
615 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pClassGlobals;
616
617 rc = RTFileAioGetLimits(&AioLimits);
618#ifdef DEBUG
619 if (RT_SUCCESS(rc) && RTEnvExist("VBOX_ASYNC_IO_FAILBACK"))
620 rc = VERR_ENV_VAR_NOT_FOUND;
621#endif
622 if (RT_FAILURE(rc))
623 {
624 LogRel(("AIO: Async I/O manager not supported (rc=%Rrc). Falling back to failsafe manager\n",
625 rc));
626 pEpClassFile->fFailsafe = true;
627 }
628 else
629 {
630 pEpClassFile->uBitmaskAlignment = AioLimits.cbBufferAlignment ? ~((RTR3UINTPTR)AioLimits.cbBufferAlignment - 1) : RTR3UINTPTR_MAX;
631 pEpClassFile->cReqsOutstandingMax = AioLimits.cReqsOutstandingMax;
632
633 /* The user can force the failsafe manager. */
634 rc = CFGMR3QueryBoolDef(pCfgNode, "UseFailsafeIo", &pEpClassFile->fFailsafe, false);
635 AssertLogRelRCReturn(rc, rc);
636
637 if (pEpClassFile->fFailsafe)
638 LogRel(("AIOMgr: Failsafe I/O was requested by user\n"));
639 }
640
641 /* Init critical section. */
642 rc = RTCritSectInit(&pEpClassFile->CritSect);
643 if (RT_SUCCESS(rc))
644 {
645 /* Check if the host cache should be used too. */
646#ifndef RT_OS_LINUX
647 rc = CFGMR3QueryBoolDef(pCfgNode, "HostCacheEnabled", &pEpClassFile->fHostCacheEnabled, false);
648 AssertLogRelRCReturn(rc, rc);
649#else
650 /*
651 * Host cache + async I/O is not supported on Linux. Check if the user enabled the cache,
652 * leave a warning and disable it always.
653 */
654 bool fDummy;
655 rc = CFGMR3QueryBool(pCfgNode, "HostCacheEnabled", &fDummy);
656 if (RT_SUCCESS(rc))
657 LogRel(("AIOMgr: The host cache is not supported with async I/O on Linux\n"));
658
659 pEpClassFile->fHostCacheEnabled = false;
660#endif
661
662 /* Check if the cache was disabled by the user. */
663 rc = CFGMR3QueryBoolDef(pCfgNode, "CacheEnabled", &pEpClassFile->fCacheEnabled, true);
664 AssertLogRelRCReturn(rc, rc);
665
666 if (pEpClassFile->fCacheEnabled)
667 {
668 /* Init cache structure */
669 rc = pdmacFileCacheInit(pEpClassFile, pCfgNode);
670 if (RT_FAILURE(rc))
671 {
672 RTCritSectDelete(&pEpClassFile->CritSect);
673 pEpClassFile->fCacheEnabled = false;
674 LogRel(("AIOMgr: Failed to initialise the cache (rc=%Rrc), disabled caching\n"));
675 }
676 }
677 else
678 LogRel(("AIOMgr: Cache was globally disabled\n"));
679
680 rc = pdmacFileBwMgrInitialize(pEpClassFile, pCfgNode, &pEpClassFile->pBwMgr);
681 if (RT_FAILURE(rc))
682 RTCritSectDelete(&pEpClassFile->CritSect);
683 }
684
685 return rc;
686}
687
688static void pdmacFileTerminate(PPDMASYNCCOMPLETIONEPCLASS pClassGlobals)
689{
690 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pClassGlobals;
691
692 /* All endpoints should be closed at this point. */
693 AssertMsg(!pEpClassFile->Core.pEndpointsHead, ("There are still endpoints left\n"));
694
695 /* Destroy all left async I/O managers. */
696 while (pEpClassFile->pAioMgrHead)
697 pdmacFileAioMgrDestroy(pEpClassFile, pEpClassFile->pAioMgrHead);
698
699 /* Destroy the cache. */
700 if (pEpClassFile->fCacheEnabled)
701 pdmacFileCacheDestroy(pEpClassFile);
702
703 RTCritSectDelete(&pEpClassFile->CritSect);
704 pdmacFileBwMgrDestroy(pEpClassFile->pBwMgr);
705}
706
707static int pdmacFileEpInitialize(PPDMASYNCCOMPLETIONENDPOINT pEndpoint,
708 const char *pszUri, uint32_t fFlags)
709{
710 int rc = VINF_SUCCESS;
711 PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEndpoint;
712 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->pEpClass;
713 bool fUseFailsafeManager = pEpClassFile->fFailsafe;
714
715 AssertMsgReturn((fFlags & ~(PDMACEP_FILE_FLAGS_READ_ONLY | PDMACEP_FILE_FLAGS_CACHING)) == 0,
716 ("PDMAsyncCompletion: Invalid flag specified\n"), VERR_INVALID_PARAMETER);
717
718 unsigned fFileFlags = fFlags & PDMACEP_FILE_FLAGS_READ_ONLY
719 ? RTFILE_O_READ | RTFILE_O_OPEN | RTFILE_O_DENY_NONE
720 : RTFILE_O_READWRITE | RTFILE_O_OPEN | RTFILE_O_DENY_WRITE;
721
722 if (!pEpClassFile->fFailsafe)
723 {
724 fFileFlags |= (RTFILE_O_ASYNC_IO | RTFILE_O_WRITE_THROUGH);
725
726 /*
727 * We only disable the cache if the size of the file is a multiple of 512.
728 * Certain hosts like Windows, Linux and Solaris require that transfer sizes
729 * are aligned to the volume sector size.
730 * If not we just make sure that the data is written to disk with RTFILE_O_WRITE_THROUGH
731 * which will trash the host cache but ensures that the host cache will not
732 * contain dirty buffers.
733 */
734 RTFILE File = NIL_RTFILE;
735
736 rc = RTFileOpen(&File, pszUri, RTFILE_O_READ | RTFILE_O_OPEN | RTFILE_O_DENY_NONE);
737 if (RT_SUCCESS(rc))
738 {
739 uint64_t cbSize;
740
741 rc = RTFileGetSize(File, &cbSize);
742 if (RT_SUCCESS(rc) && ((cbSize % 512) == 0))
743 {
744 fFileFlags &= ~RTFILE_O_WRITE_THROUGH;
745
746#if defined(RT_OS_LINUX)
747 AssertMsg(!pEpClassFile->fHostCacheEnabled, ("Host cache + async I/O is not supported on Linux\n"));
748 fFileFlags |= RTFILE_O_NO_CACHE;
749#else
750 if (!pEpClassFile->fHostCacheEnabled)
751 fFileFlags |= RTFILE_O_NO_CACHE;
752#endif
753 }
754
755 pEpFile->cbFile = cbSize;
756
757 RTFileClose(File);
758 }
759 }
760
761 /* Open with final flags. */
762 rc = RTFileOpen(&pEpFile->File, pszUri, fFileFlags);
763 if ((rc == VERR_INVALID_FUNCTION) || (rc == VERR_INVALID_PARAMETER))
764 {
765 LogRel(("pdmacFileEpInitialize: RTFileOpen %s / %08x failed with %Rrc\n",
766 pszUri, fFileFlags, rc));
767 /*
768 * Solaris doesn't support directio on ZFS so far. :-\
769 * Trying to enable it returns VERR_INVALID_FUNCTION
770 * (ENOTTY). Remove it and hope for the best.
771 * ZFS supports write throttling in case applications
772 * write more data than can be synced to the disk
773 * without blocking the whole application.
774 *
775 * On Linux we have the same problem with cifs.
776 * Have to disable async I/O here too because it requires O_DIRECT.
777 */
778 fFileFlags &= ~RTFILE_O_NO_CACHE;
779
780#ifdef RT_OS_LINUX
781 fFileFlags &= ~RTFILE_O_ASYNC_IO;
782 fUseFailsafeManager = true;
783#endif
784
785 /* Open again. */
786 rc = RTFileOpen(&pEpFile->File, pszUri, fFileFlags);
787
788 if (RT_FAILURE(rc))
789 {
790 LogRel(("pdmacFileEpInitialize: RTFileOpen %s / %08x failed AGAIN(!) with %Rrc\n",
791 pszUri, fFileFlags, rc));
792 }
793 }
794
795 if (RT_SUCCESS(rc))
796 {
797 pEpFile->fFlags = fFileFlags;
798
799 rc = RTFileGetSize(pEpFile->File, (uint64_t *)&pEpFile->cbFile);
800 if (RT_SUCCESS(rc) && (pEpFile->cbFile == 0))
801 {
802 /* Could be a block device */
803 rc = RTFileSeek(pEpFile->File, 0, RTFILE_SEEK_END, (uint64_t *)&pEpFile->cbFile);
804 }
805
806 if (RT_SUCCESS(rc))
807 {
808 /* Initialize the segment cache */
809 rc = MMR3HeapAllocZEx(pEpClassFile->Core.pVM, MM_TAG_PDM_ASYNC_COMPLETION,
810 sizeof(PDMACTASKFILE),
811 (void **)&pEpFile->pTasksFreeHead);
812 if (RT_SUCCESS(rc))
813 {
814 PPDMACEPFILEMGR pAioMgr = NULL;
815
816 pEpFile->pTasksFreeTail = pEpFile->pTasksFreeHead;
817 pEpFile->cTasksCached = 0;
818 pEpFile->pBwMgr = pEpClassFile->pBwMgr;
819 pdmacFileBwRef(pEpFile->pBwMgr);
820
821 if (fUseFailsafeManager)
822 {
823 /* Safe mode. Every file has its own async I/O manager. */
824 rc = pdmacFileAioMgrCreate(pEpClassFile, &pAioMgr, true);
825 AssertRC(rc);
826 }
827 else
828 {
829 if ( (fFlags & PDMACEP_FILE_FLAGS_CACHING)
830 && (pEpClassFile->fCacheEnabled))
831 {
832 pEpFile->fCaching = true;
833 rc = pdmacFileEpCacheInit(pEpFile, pEpClassFile);
834 if (RT_FAILURE(rc))
835 {
836 LogRel(("AIOMgr: Endpoint for \"%s\" was opened with caching but initializing cache failed. Disabled caching\n", pszUri));
837 pEpFile->fCaching = false;
838 }
839 }
840
841 pAioMgr = pEpClassFile->pAioMgrHead;
842
843 /* Check for an idling not failsafe one or create new if not found */
844 while (pAioMgr && pAioMgr->fFailsafe)
845 pAioMgr = pAioMgr->pNext;
846
847 if (!pAioMgr)
848 {
849 rc = pdmacFileAioMgrCreate(pEpClassFile, &pAioMgr, false);
850 AssertRC(rc);
851 }
852 }
853
854 pEpFile->AioMgr.pTreeRangesLocked = (PAVLRFOFFTREE)RTMemAllocZ(sizeof(AVLRFOFFTREE));
855 if (!pEpFile->AioMgr.pTreeRangesLocked)
856 rc = VERR_NO_MEMORY;
857 else
858 {
859 pEpFile->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE;
860
861 /* Assign the endpoint to the thread. */
862 rc = pdmacFileAioMgrAddEndpoint(pAioMgr, pEpFile);
863 if (RT_FAILURE(rc))
864 {
865 RTMemFree(pEpFile->AioMgr.pTreeRangesLocked);
866 MMR3HeapFree(pEpFile->pTasksFreeHead);
867 pdmacFileBwUnref(pEpFile->pBwMgr);
868 }
869 }
870 }
871 }
872
873 if (RT_FAILURE(rc))
874 RTFileClose(pEpFile->File);
875 }
876
877#ifdef VBOX_WITH_STATISTICS
878 if (RT_SUCCESS(rc))
879 {
880 STAMR3RegisterF(pEpClassFile->Core.pVM, &pEpFile->StatRead,
881 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
882 STAMUNIT_TICKS_PER_CALL, "Time taken to read from the endpoint",
883 "/PDM/AsyncCompletion/File/%s/Read", RTPathFilename(pEpFile->Core.pszUri));
884
885 STAMR3RegisterF(pEpClassFile->Core.pVM, &pEpFile->StatWrite,
886 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
887 STAMUNIT_TICKS_PER_CALL, "Time taken to write to the endpoint",
888 "/PDM/AsyncCompletion/File/%s/Write", RTPathFilename(pEpFile->Core.pszUri));
889 }
890#endif
891
892 return rc;
893}
894
895static int pdmacFileEpRangesLockedDestroy(PAVLRFOFFNODECORE pNode, void *pvUser)
896{
897 AssertMsgFailed(("The locked ranges tree should be empty at that point\n"));
898 return VINF_SUCCESS;
899}
900
901static int pdmacFileEpClose(PPDMASYNCCOMPLETIONENDPOINT pEndpoint)
902{
903 PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEndpoint;
904 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->pEpClass;
905
906 /* Make sure that all tasks finished for this endpoint. */
907 int rc = pdmacFileAioMgrCloseEndpoint(pEpFile->pAioMgr, pEpFile);
908 AssertRC(rc);
909
910 /*
911 * If the async I/O manager is in failsafe mode this is the only endpoint
912 * he processes and thus can be destroyed now.
913 */
914 if (pEpFile->pAioMgr->fFailsafe)
915 pdmacFileAioMgrDestroy(pEpClassFile, pEpFile->pAioMgr);
916
917 /* Free cached tasks. */
918 PPDMACTASKFILE pTask = pEpFile->pTasksFreeHead;
919
920 while (pTask)
921 {
922 PPDMACTASKFILE pTaskFree = pTask;
923 pTask = pTask->pNext;
924 MMR3HeapFree(pTaskFree);
925 }
926
927 /* Free the cached data. */
928 if (pEpFile->fCaching)
929 pdmacFileEpCacheDestroy(pEpFile);
930
931 /* Remove from the bandwidth manager */
932 pdmacFileBwUnref(pEpFile->pBwMgr);
933
934 /* Destroy the locked ranges tree now. */
935 RTAvlrFileOffsetDestroy(pEpFile->AioMgr.pTreeRangesLocked, pdmacFileEpRangesLockedDestroy, NULL);
936
937 RTFileClose(pEpFile->File);
938
939#ifdef VBOX_WITH_STATISTICS
940 STAMR3Deregister(pEpClassFile->Core.pVM, &pEpFile->StatRead);
941 STAMR3Deregister(pEpClassFile->Core.pVM, &pEpFile->StatWrite);
942#endif
943
944 return VINF_SUCCESS;
945}
946
947static int pdmacFileEpRead(PPDMASYNCCOMPLETIONTASK pTask,
948 PPDMASYNCCOMPLETIONENDPOINT pEndpoint, RTFOFF off,
949 PCPDMDATASEG paSegments, size_t cSegments,
950 size_t cbRead)
951{
952 int rc = VINF_SUCCESS;
953 PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEndpoint;
954
955 STAM_PROFILE_ADV_START(&pEpFile->StatRead, Read);
956
957 if (pEpFile->fCaching)
958 rc = pdmacFileEpCacheRead(pEpFile, (PPDMASYNCCOMPLETIONTASKFILE)pTask,
959 off, paSegments, cSegments, cbRead);
960 else
961 rc = pdmacFileEpTaskInitiate(pTask, pEndpoint, off, paSegments, cSegments, cbRead,
962 PDMACTASKFILETRANSFER_READ);
963
964 STAM_PROFILE_ADV_STOP(&pEpFile->StatRead, Read);
965
966 return rc;
967}
968
969static int pdmacFileEpWrite(PPDMASYNCCOMPLETIONTASK pTask,
970 PPDMASYNCCOMPLETIONENDPOINT pEndpoint, RTFOFF off,
971 PCPDMDATASEG paSegments, size_t cSegments,
972 size_t cbWrite)
973{
974 int rc = VINF_SUCCESS;
975 PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEndpoint;
976
977 if (RT_UNLIKELY(pEpFile->fReadonly))
978 return VERR_NOT_SUPPORTED;
979
980 STAM_PROFILE_ADV_START(&pEpFile->StatWrite, Write);
981
982 if (pEpFile->fCaching)
983 rc = pdmacFileEpCacheWrite(pEpFile, (PPDMASYNCCOMPLETIONTASKFILE)pTask,
984 off, paSegments, cSegments, cbWrite);
985 else
986 rc = pdmacFileEpTaskInitiate(pTask, pEndpoint, off, paSegments, cSegments, cbWrite,
987 PDMACTASKFILETRANSFER_WRITE);
988
989 STAM_PROFILE_ADV_STOP(&pEpFile->StatWrite, Write);
990
991 return rc;
992}
993
994static int pdmacFileEpFlush(PPDMASYNCCOMPLETIONTASK pTask,
995 PPDMASYNCCOMPLETIONENDPOINT pEndpoint)
996{
997 int rc = VINF_SUCCESS;
998 PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEndpoint;
999 PPDMASYNCCOMPLETIONTASKFILE pTaskFile = (PPDMASYNCCOMPLETIONTASKFILE)pTask;
1000
1001 if (RT_UNLIKELY(pEpFile->fReadonly))
1002 return VERR_NOT_SUPPORTED;
1003
1004 pTaskFile->cbTransferLeft = 0;
1005
1006 if (pEpFile->fCaching)
1007 rc = pdmacFileEpCacheFlush(pEpFile, pTaskFile);
1008 else
1009 {
1010 PPDMACTASKFILE pIoTask = pdmacFileTaskAlloc(pEpFile);
1011 AssertPtr(pIoTask);
1012
1013 pIoTask->pEndpoint = pEpFile;
1014 pIoTask->enmTransferType = PDMACTASKFILETRANSFER_FLUSH;
1015 pIoTask->pvUser = pTaskFile;
1016 pIoTask->pfnCompleted = pdmacFileEpTaskCompleted;
1017 pdmacFileEpAddTask(pEpFile, pIoTask);
1018 rc = VINF_AIO_TASK_PENDING;
1019 }
1020
1021 return rc;
1022}
1023
1024static int pdmacFileEpGetSize(PPDMASYNCCOMPLETIONENDPOINT pEndpoint, uint64_t *pcbSize)
1025{
1026 PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEndpoint;
1027
1028 *pcbSize = ASMAtomicReadU64(&pEpFile->cbFile);
1029
1030 return VINF_SUCCESS;
1031}
1032
1033const PDMASYNCCOMPLETIONEPCLASSOPS g_PDMAsyncCompletionEndpointClassFile =
1034{
1035 /* u32Version */
1036 PDMAC_EPCLASS_OPS_VERSION,
1037 /* pcszName */
1038 "File",
1039 /* enmClassType */
1040 PDMASYNCCOMPLETIONEPCLASSTYPE_FILE,
1041 /* cbEndpointClassGlobal */
1042 sizeof(PDMASYNCCOMPLETIONEPCLASSFILE),
1043 /* cbEndpoint */
1044 sizeof(PDMASYNCCOMPLETIONENDPOINTFILE),
1045 /* cbTask */
1046 sizeof(PDMASYNCCOMPLETIONTASKFILE),
1047 /* pfnInitialize */
1048 pdmacFileInitialize,
1049 /* pfnTerminate */
1050 pdmacFileTerminate,
1051 /* pfnEpInitialize. */
1052 pdmacFileEpInitialize,
1053 /* pfnEpClose */
1054 pdmacFileEpClose,
1055 /* pfnEpRead */
1056 pdmacFileEpRead,
1057 /* pfnEpWrite */
1058 pdmacFileEpWrite,
1059 /* pfnEpFlush */
1060 pdmacFileEpFlush,
1061 /* pfnEpGetSize */
1062 pdmacFileEpGetSize,
1063 /* u32VersionEnd */
1064 PDMAC_EPCLASS_OPS_VERSION
1065};
1066
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette