VirtualBox

source: vbox/trunk/src/VBox/VMM/PDMAsyncCompletionFileCache.cpp@ 26812

Last change on this file since 26812 was 26812, checked in by vboxsync, 15 years ago

AsyncCompletion: Don't immediately commit dirty buffers to the endpoint to reduce the I/O load on the host and the I/O performance in the guest for often updated cache entries

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 79.4 KB
Line 
1/* $Id: PDMAsyncCompletionFileCache.cpp 26812 2010-02-25 20:55:08Z vboxsync $ */
2/** @file
3 * PDM Async I/O - Transport data asynchronous in R3 using EMT.
4 * File data cache.
5 */
6
7/*
8 * Copyright (C) 2006-2008 Sun Microsystems, Inc.
9 *
10 * This file is part of VirtualBox Open Source Edition (OSE), as
11 * available from http://www.215389.xyz. This file is free software;
12 * you can redistribute it and/or modify it under the terms of the GNU
13 * General Public License (GPL) as published by the Free Software
14 * Foundation, in version 2 as it comes in the "COPYING" file of the
15 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
16 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
17 *
18 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
19 * Clara, CA 95054 USA or visit http://www.sun.com if you need
20 * additional information or have any questions.
21 */
22
23/** @page pg_pdm_async_completion_cache PDM Async Completion Cache - The file I/O cache
24 * This component implements an I/O cache for file endpoints based on the 2Q cache algorithm.
25 */
26
27/*******************************************************************************
28* Header Files *
29*******************************************************************************/
30#define LOG_GROUP LOG_GROUP_PDM_ASYNC_COMPLETION
31#include <iprt/types.h>
32#include <iprt/mem.h>
33#include <iprt/path.h>
34#include <VBox/log.h>
35#include <VBox/stam.h>
36
37#include "PDMAsyncCompletionFileInternal.h"
38
39/**
40 * A I/O memory context.
41 */
42typedef struct PDMIOMEMCTX
43{
44 /** Pointer to the scatter/gather list. */
45 PCPDMDATASEG paDataSeg;
46 /** Number of segments. */
47 size_t cSegments;
48 /** Current segment we are in. */
49 unsigned iSegIdx;
50 /** Pointer to the current buffer. */
51 uint8_t *pbBuf;
52 /** Number of bytes left in the current buffer. */
53 size_t cbBufLeft;
54} PDMIOMEMCTX, *PPDMIOMEMCTX;
55
56#ifdef VBOX_STRICT
57# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) \
58 do \
59 { \
60 AssertMsg(RTCritSectIsOwner(&Cache->CritSect), \
61 ("Thread does not own critical section\n"));\
62 } while(0)
63
64# define PDMACFILECACHE_EP_IS_SEMRW_WRITE_OWNER(pEpCache) \
65 do \
66 { \
67 AssertMsg(RTSemRWIsWriteOwner(pEpCache->SemRWEntries), \
68 ("Thread is not exclusive owner of the per endpoint RW semaphore\n")); \
69 } while(0)
70
71# define PDMACFILECACHE_EP_IS_SEMRW_READ_OWNER(pEpCache) \
72 do \
73 { \
74 AssertMsg(RTSemRWIsReadOwner(pEpCache->SemRWEntries), \
75 ("Thread is not read owner of the per endpoint RW semaphore\n")); \
76 } while(0)
77
78#else
79# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) do { } while(0)
80# define PDMACFILECACHE_EP_IS_SEMRW_WRITE_OWNER(pEpCache) do { } while(0)
81# define PDMACFILECACHE_EP_IS_SEMRW_READ_OWNER(pEpCache) do { } while(0)
82#endif
83
84/*******************************************************************************
85* Internal Functions *
86*******************************************************************************/
87static void pdmacFileCacheTaskCompleted(PPDMACTASKFILE pTask, void *pvUser);
88
89/**
90 * Decrement the reference counter of the given cache entry.
91 *
92 * @returns nothing.
93 * @param pEntry The entry to release.
94 */
95DECLINLINE(void) pdmacFileEpCacheEntryRelease(PPDMACFILECACHEENTRY pEntry)
96{
97 AssertMsg(pEntry->cRefs > 0, ("Trying to release a not referenced entry\n"));
98 ASMAtomicDecU32(&pEntry->cRefs);
99}
100
101/**
102 * Increment the reference counter of the given cache entry.
103 *
104 * @returns nothing.
105 * @param pEntry The entry to reference.
106 */
107DECLINLINE(void) pdmacFileEpCacheEntryRef(PPDMACFILECACHEENTRY pEntry)
108{
109 ASMAtomicIncU32(&pEntry->cRefs);
110}
111
112/**
113 * Initialize a I/O memory context.
114 *
115 * @returns nothing
116 * @param pIoMemCtx Pointer to a unitialized I/O memory context.
117 * @param paDataSeg Pointer to the S/G list.
118 * @param cSegments Number of segments in the S/G list.
119 */
120DECLINLINE(void) pdmIoMemCtxInit(PPDMIOMEMCTX pIoMemCtx, PCPDMDATASEG paDataSeg, size_t cSegments)
121{
122 AssertMsg((cSegments > 0) && paDataSeg, ("Trying to initialize a I/O memory context without a S/G list\n"));
123
124 pIoMemCtx->paDataSeg = paDataSeg;
125 pIoMemCtx->cSegments = cSegments;
126 pIoMemCtx->iSegIdx = 0;
127 pIoMemCtx->pbBuf = (uint8_t *)paDataSeg[0].pvSeg;
128 pIoMemCtx->cbBufLeft = paDataSeg[0].cbSeg;
129}
130
131/**
132 * Return a buffer from the I/O memory context.
133 *
134 * @returns Pointer to the buffer
135 * @param pIoMemCtx Pointer to the I/O memory context.
136 * @param pcbData Pointer to the amount of byte requested.
137 * If the current buffer doesn't have enough bytes left
138 * the amount is returned in the variable.
139 */
140DECLINLINE(uint8_t *) pdmIoMemCtxGetBuffer(PPDMIOMEMCTX pIoMemCtx, size_t *pcbData)
141{
142 size_t cbData = RT_MIN(*pcbData, pIoMemCtx->cbBufLeft);
143 uint8_t *pbBuf = pIoMemCtx->pbBuf;
144
145 pIoMemCtx->cbBufLeft -= cbData;
146
147 /* Advance to the next segment if required. */
148 if (!pIoMemCtx->cbBufLeft)
149 {
150 pIoMemCtx->iSegIdx++;
151
152 if (RT_UNLIKELY(pIoMemCtx->iSegIdx == pIoMemCtx->cSegments))
153 {
154 pIoMemCtx->cbBufLeft = 0;
155 pIoMemCtx->pbBuf = NULL;
156 }
157 else
158 {
159 pIoMemCtx->pbBuf = (uint8_t *)pIoMemCtx->paDataSeg[pIoMemCtx->iSegIdx].pvSeg;
160 pIoMemCtx->cbBufLeft = pIoMemCtx->paDataSeg[pIoMemCtx->iSegIdx].cbSeg;
161 }
162
163 *pcbData = cbData;
164 }
165 else
166 pIoMemCtx->pbBuf += cbData;
167
168 return pbBuf;
169}
170
171#ifdef DEBUG
172static void pdmacFileCacheValidate(PPDMACFILECACHEGLOBAL pCache)
173{
174 /* Amount of cached data should never exceed the maximum amount. */
175 AssertMsg(pCache->cbCached <= pCache->cbMax,
176 ("Current amount of cached data exceeds maximum\n"));
177
178 /* The amount of cached data in the LRU and FRU list should match cbCached */
179 AssertMsg(pCache->LruRecentlyUsedIn.cbCached + pCache->LruFrequentlyUsed.cbCached == pCache->cbCached,
180 ("Amount of cached data doesn't match\n"));
181
182 AssertMsg(pCache->LruRecentlyUsedOut.cbCached <= pCache->cbRecentlyUsedOutMax,
183 ("Paged out list exceeds maximum\n"));
184}
185#endif
186
187DECLINLINE(void) pdmacFileCacheLockEnter(PPDMACFILECACHEGLOBAL pCache)
188{
189 RTCritSectEnter(&pCache->CritSect);
190#ifdef DEBUG
191 pdmacFileCacheValidate(pCache);
192#endif
193}
194
195DECLINLINE(void) pdmacFileCacheLockLeave(PPDMACFILECACHEGLOBAL pCache)
196{
197#ifdef DEBUG
198 pdmacFileCacheValidate(pCache);
199#endif
200 RTCritSectLeave(&pCache->CritSect);
201}
202
203DECLINLINE(void) pdmacFileCacheSub(PPDMACFILECACHEGLOBAL pCache, uint32_t cbAmount)
204{
205 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
206 pCache->cbCached -= cbAmount;
207}
208
209DECLINLINE(void) pdmacFileCacheAdd(PPDMACFILECACHEGLOBAL pCache, uint32_t cbAmount)
210{
211 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
212 pCache->cbCached += cbAmount;
213}
214
215DECLINLINE(void) pdmacFileCacheListAdd(PPDMACFILELRULIST pList, uint32_t cbAmount)
216{
217 pList->cbCached += cbAmount;
218}
219
220DECLINLINE(void) pdmacFileCacheListSub(PPDMACFILELRULIST pList, uint32_t cbAmount)
221{
222 pList->cbCached -= cbAmount;
223}
224
225#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
226/**
227 * Checks consistency of a LRU list.
228 *
229 * @returns nothing
230 * @param pList The LRU list to check.
231 * @param pNotInList Element which is not allowed to occur in the list.
232 */
233static void pdmacFileCacheCheckList(PPDMACFILELRULIST pList, PPDMACFILECACHEENTRY pNotInList)
234{
235 PPDMACFILECACHEENTRY pCurr = pList->pHead;
236
237 /* Check that there are no double entries and no cycles in the list. */
238 while (pCurr)
239 {
240 PPDMACFILECACHEENTRY pNext = pCurr->pNext;
241
242 while (pNext)
243 {
244 AssertMsg(pCurr != pNext,
245 ("Entry %#p is at least two times in list %#p or there is a cycle in the list\n",
246 pCurr, pList));
247 pNext = pNext->pNext;
248 }
249
250 AssertMsg(pCurr != pNotInList, ("Not allowed entry %#p is in list\n", pCurr));
251
252 if (!pCurr->pNext)
253 AssertMsg(pCurr == pList->pTail, ("End of list reached but last element is not list tail\n"));
254
255 pCurr = pCurr->pNext;
256 }
257}
258#endif
259
260/**
261 * Unlinks a cache entry from the LRU list it is assigned to.
262 *
263 * @returns nothing.
264 * @param pEntry The entry to unlink.
265 */
266static void pdmacFileCacheEntryRemoveFromList(PPDMACFILECACHEENTRY pEntry)
267{
268 PPDMACFILELRULIST pList = pEntry->pList;
269 PPDMACFILECACHEENTRY pPrev, pNext;
270
271 LogFlowFunc((": Deleting entry %#p from list %#p\n", pEntry, pList));
272
273 AssertPtr(pList);
274
275#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
276 pdmacFileCacheCheckList(pList, NULL);
277#endif
278
279 pPrev = pEntry->pPrev;
280 pNext = pEntry->pNext;
281
282 AssertMsg(pEntry != pPrev, ("Entry links to itself as previous element\n"));
283 AssertMsg(pEntry != pNext, ("Entry links to itself as next element\n"));
284
285 if (pPrev)
286 pPrev->pNext = pNext;
287 else
288 {
289 pList->pHead = pNext;
290
291 if (pNext)
292 pNext->pPrev = NULL;
293 }
294
295 if (pNext)
296 pNext->pPrev = pPrev;
297 else
298 {
299 pList->pTail = pPrev;
300
301 if (pPrev)
302 pPrev->pNext = NULL;
303 }
304
305 pEntry->pList = NULL;
306 pEntry->pPrev = NULL;
307 pEntry->pNext = NULL;
308 pdmacFileCacheListSub(pList, pEntry->cbData);
309#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
310 pdmacFileCacheCheckList(pList, pEntry);
311#endif
312}
313
314/**
315 * Adds a cache entry to the given LRU list unlinking it from the currently
316 * assigned list if needed.
317 *
318 * @returns nothing.
319 * @param pList List to the add entry to.
320 * @param pEntry Entry to add.
321 */
322static void pdmacFileCacheEntryAddToList(PPDMACFILELRULIST pList, PPDMACFILECACHEENTRY pEntry)
323{
324 LogFlowFunc((": Adding entry %#p to list %#p\n", pEntry, pList));
325#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
326 pdmacFileCacheCheckList(pList, NULL);
327#endif
328
329 /* Remove from old list if needed */
330 if (pEntry->pList)
331 pdmacFileCacheEntryRemoveFromList(pEntry);
332
333 pEntry->pNext = pList->pHead;
334 if (pList->pHead)
335 pList->pHead->pPrev = pEntry;
336 else
337 {
338 Assert(!pList->pTail);
339 pList->pTail = pEntry;
340 }
341
342 pEntry->pPrev = NULL;
343 pList->pHead = pEntry;
344 pdmacFileCacheListAdd(pList, pEntry->cbData);
345 pEntry->pList = pList;
346#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
347 pdmacFileCacheCheckList(pList, NULL);
348#endif
349}
350
351/**
352 * Destroys a LRU list freeing all entries.
353 *
354 * @returns nothing
355 * @param pList Pointer to the LRU list to destroy.
356 *
357 * @note The caller must own the critical section of the cache.
358 */
359static void pdmacFileCacheDestroyList(PPDMACFILELRULIST pList)
360{
361 while (pList->pHead)
362 {
363 PPDMACFILECACHEENTRY pEntry = pList->pHead;
364
365 pList->pHead = pEntry->pNext;
366
367 AssertMsg(!(pEntry->fFlags & (PDMACFILECACHE_ENTRY_IO_IN_PROGRESS | PDMACFILECACHE_ENTRY_IS_DIRTY)),
368 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
369
370 RTMemPageFree(pEntry->pbData);
371 RTMemFree(pEntry);
372 }
373}
374
375/**
376 * Tries to remove the given amount of bytes from a given list in the cache
377 * moving the entries to one of the given ghosts lists
378 *
379 * @returns Amount of data which could be freed.
380 * @param pCache Pointer to the global cache data.
381 * @param cbData The amount of the data to free.
382 * @param pListSrc The source list to evict data from.
383 * @param pGhostListSrc The ghost list removed entries should be moved to
384 * NULL if the entry should be freed.
385 * @param fReuseBuffer Flag whether a buffer should be reused if it has the same size
386 * @param ppbBuf Where to store the address of the buffer if an entry with the
387 * same size was found and fReuseBuffer is true.
388 *
389 * @note This function may return fewer bytes than requested because entries
390 * may be marked as non evictable if they are used for I/O at the
391 * moment.
392 */
393static size_t pdmacFileCacheEvictPagesFrom(PPDMACFILECACHEGLOBAL pCache, size_t cbData,
394 PPDMACFILELRULIST pListSrc, PPDMACFILELRULIST pGhostListDst,
395 bool fReuseBuffer, uint8_t **ppbBuffer)
396{
397 size_t cbEvicted = 0;
398
399 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
400
401 AssertMsg(cbData > 0, ("Evicting 0 bytes not possible\n"));
402 AssertMsg( !pGhostListDst
403 || (pGhostListDst == &pCache->LruRecentlyUsedOut),
404 ("Destination list must be NULL or the recently used but paged out list\n"));
405
406 if (fReuseBuffer)
407 {
408 AssertPtr(ppbBuffer);
409 *ppbBuffer = NULL;
410 }
411
412 /* Start deleting from the tail. */
413 PPDMACFILECACHEENTRY pEntry = pListSrc->pTail;
414
415 while ((cbEvicted < cbData) && pEntry)
416 {
417 PPDMACFILECACHEENTRY pCurr = pEntry;
418
419 pEntry = pEntry->pPrev;
420
421 /* We can't evict pages which are currently in progress or dirty but not in progress */
422 if ( !(pCurr->fFlags & PDMACFILECACHE_NOT_EVICTABLE)
423 && (ASMAtomicReadU32(&pCurr->cRefs) == 0))
424 {
425 /* Ok eviction candidate. Grab the endpoint semaphore and check again
426 * because somebody else might have raced us. */
427 PPDMACFILEENDPOINTCACHE pEndpointCache = &pCurr->pEndpoint->DataCache;
428 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
429
430 if (!(pCurr->fFlags & PDMACFILECACHE_NOT_EVICTABLE)
431 && (ASMAtomicReadU32(&pCurr->cRefs) == 0))
432 {
433 AssertMsg(!(pCurr->fFlags & PDMACFILECACHE_ENTRY_IS_DEPRECATED),
434 ("This entry is deprecated so it should have the I/O in progress flag set\n"));
435 Assert(!pCurr->pbDataReplace);
436
437 LogFlow(("Evicting entry %#p (%u bytes)\n", pCurr, pCurr->cbData));
438
439 if (fReuseBuffer && (pCurr->cbData == cbData))
440 {
441 STAM_COUNTER_INC(&pCache->StatBuffersReused);
442 *ppbBuffer = pCurr->pbData;
443 }
444 else if (pCurr->pbData)
445 RTMemPageFree(pCurr->pbData);
446
447 pCurr->pbData = NULL;
448 cbEvicted += pCurr->cbData;
449
450 pdmacFileCacheEntryRemoveFromList(pCurr);
451 pdmacFileCacheSub(pCache, pCurr->cbData);
452
453 if (pGhostListDst)
454 {
455 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
456
457 PPDMACFILECACHEENTRY pGhostEntFree = pGhostListDst->pTail;
458
459 /* We have to remove the last entries from the paged out list. */
460 while ( ((pGhostListDst->cbCached + pCurr->cbData) > pCache->cbRecentlyUsedOutMax)
461 && pGhostEntFree)
462 {
463 PPDMACFILECACHEENTRY pFree = pGhostEntFree;
464 PPDMACFILEENDPOINTCACHE pEndpointCacheFree = &pFree->pEndpoint->DataCache;
465
466 pGhostEntFree = pGhostEntFree->pPrev;
467
468 RTSemRWRequestWrite(pEndpointCacheFree->SemRWEntries, RT_INDEFINITE_WAIT);
469
470 if (ASMAtomicReadU32(&pFree->cRefs) == 0)
471 {
472 pdmacFileCacheEntryRemoveFromList(pFree);
473
474 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
475 RTAvlrFileOffsetRemove(pEndpointCacheFree->pTree, pFree->Core.Key);
476 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
477
478 RTMemFree(pFree);
479 }
480
481 RTSemRWReleaseWrite(pEndpointCacheFree->SemRWEntries);
482 }
483
484 if (pGhostListDst->cbCached + pCurr->cbData > pCache->cbRecentlyUsedOutMax)
485 {
486 /* Couldn't remove enough entries. Delete */
487 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
488 RTAvlrFileOffsetRemove(pCurr->pEndpoint->DataCache.pTree, pCurr->Core.Key);
489 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
490
491 RTMemFree(pCurr);
492 }
493 else
494 pdmacFileCacheEntryAddToList(pGhostListDst, pCurr);
495 }
496 else
497 {
498 /* Delete the entry from the AVL tree it is assigned to. */
499 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
500 RTAvlrFileOffsetRemove(pCurr->pEndpoint->DataCache.pTree, pCurr->Core.Key);
501 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
502
503 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
504 RTMemFree(pCurr);
505 }
506 }
507
508 }
509 else
510 LogFlow(("Entry %#p (%u bytes) is still in progress and can't be evicted\n", pCurr, pCurr->cbData));
511 }
512
513 return cbEvicted;
514}
515
516static bool pdmacFileCacheReclaim(PPDMACFILECACHEGLOBAL pCache, size_t cbData, bool fReuseBuffer, uint8_t **ppbBuffer)
517{
518 size_t cbRemoved = 0;
519
520 if ((pCache->cbCached + cbData) < pCache->cbMax)
521 return true;
522 else if ((pCache->LruRecentlyUsedIn.cbCached + cbData) > pCache->cbRecentlyUsedInMax)
523 {
524 /* Try to evict as many bytes as possible from A1in */
525 cbRemoved = pdmacFileCacheEvictPagesFrom(pCache, cbData, &pCache->LruRecentlyUsedIn,
526 &pCache->LruRecentlyUsedOut, fReuseBuffer, ppbBuffer);
527
528 /*
529 * If it was not possible to remove enough entries
530 * try the frequently accessed cache.
531 */
532 if (cbRemoved < cbData)
533 {
534 Assert(!fReuseBuffer || !*ppbBuffer); /* It is not possible that we got a buffer with the correct size but we didn't freed enough data. */
535
536 cbRemoved += pdmacFileCacheEvictPagesFrom(pCache, cbData - cbRemoved, &pCache->LruFrequentlyUsed,
537 NULL, fReuseBuffer, ppbBuffer);
538 }
539 }
540 else
541 {
542 /* We have to remove entries from frequently access list. */
543 cbRemoved = pdmacFileCacheEvictPagesFrom(pCache, cbData, &pCache->LruFrequentlyUsed,
544 NULL, fReuseBuffer, ppbBuffer);
545 }
546
547 LogFlowFunc((": removed %u bytes, requested %u\n", cbRemoved, cbData));
548 return (cbRemoved >= cbData);
549}
550
551/**
552 * Initiates a read I/O task for the given entry.
553 *
554 * @returns nothing.
555 * @param pEntry The entry to fetch the data to.
556 */
557static void pdmacFileCacheReadFromEndpoint(PPDMACFILECACHEENTRY pEntry)
558{
559 LogFlowFunc((": Reading data into cache entry %#p\n", pEntry));
560
561 /* Make sure no one evicts the entry while it is accessed. */
562 pEntry->fFlags |= PDMACFILECACHE_ENTRY_IO_IN_PROGRESS;
563
564 PPDMACTASKFILE pIoTask = pdmacFileTaskAlloc(pEntry->pEndpoint);
565 AssertPtr(pIoTask);
566
567 AssertMsg(pEntry->pbData, ("Entry is in ghost state\n"));
568
569 pIoTask->pEndpoint = pEntry->pEndpoint;
570 pIoTask->enmTransferType = PDMACTASKFILETRANSFER_READ;
571 pIoTask->Off = pEntry->Core.Key;
572 pIoTask->DataSeg.cbSeg = pEntry->cbData;
573 pIoTask->DataSeg.pvSeg = pEntry->pbData;
574 pIoTask->pvUser = pEntry;
575 pIoTask->pfnCompleted = pdmacFileCacheTaskCompleted;
576
577 /* Send it off to the I/O manager. */
578 pdmacFileEpAddTask(pEntry->pEndpoint, pIoTask);
579}
580
581/**
582 * Initiates a write I/O task for the given entry.
583 *
584 * @returns nothing.
585 * @param pEntry The entry to read the data from.
586 */
587static void pdmacFileCacheWriteToEndpoint(PPDMACFILECACHEENTRY pEntry)
588{
589 LogFlowFunc((": Writing data from cache entry %#p\n", pEntry));
590
591 /* Make sure no one evicts the entry while it is accessed. */
592 pEntry->fFlags |= PDMACFILECACHE_ENTRY_IO_IN_PROGRESS;
593
594 PPDMACTASKFILE pIoTask = pdmacFileTaskAlloc(pEntry->pEndpoint);
595 AssertPtr(pIoTask);
596
597 AssertMsg(pEntry->pbData, ("Entry is in ghost state\n"));
598
599 pIoTask->pEndpoint = pEntry->pEndpoint;
600 pIoTask->enmTransferType = PDMACTASKFILETRANSFER_WRITE;
601 pIoTask->Off = pEntry->Core.Key;
602 pIoTask->DataSeg.cbSeg = pEntry->cbData;
603 pIoTask->DataSeg.pvSeg = pEntry->pbData;
604 pIoTask->pvUser = pEntry;
605 pIoTask->pfnCompleted = pdmacFileCacheTaskCompleted;
606 ASMAtomicIncU32(&pEntry->pEndpoint->DataCache.cWritesOutstanding);
607
608 /* Send it off to the I/O manager. */
609 pdmacFileEpAddTask(pEntry->pEndpoint, pIoTask);
610}
611
612/**
613 * Commit a single dirty entry to the endpoint
614 *
615 * @returns nothing
616 * @param pEntry The entry to commit.
617 */
618static void pdmacFileCacheEntryCommit(PPDMACFILEENDPOINTCACHE pEndpointCache, PPDMACFILECACHEENTRY pEntry)
619{
620 NOREF(pEndpointCache);
621 AssertMsg( (pEntry->fFlags & PDMACFILECACHE_ENTRY_IS_DIRTY)
622 && !(pEntry->fFlags & (PDMACFILECACHE_ENTRY_IO_IN_PROGRESS | PDMACFILECACHE_ENTRY_IS_DEPRECATED)),
623 ("Invalid flags set for entry %#p\n", pEntry));
624
625 pdmacFileCacheWriteToEndpoint(pEntry);
626}
627
628/**
629 * Commit all dirty entries for a single endpoint.
630 *
631 * @returns nothing.
632 * @param pEndpointCache The endpoint cache to commit.
633 */
634static void pdmacFileCacheEndpointCommit(PPDMACFILEENDPOINTCACHE pEndpointCache)
635{
636 uint32_t cbCommitted = 0;
637 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
638
639 if (!RTListIsEmpty(&pEndpointCache->ListDirtyNotCommitted))
640 {
641 PPDMACFILECACHEENTRY pEntry = RTListNodeGetFirst(&pEndpointCache->ListDirtyNotCommitted,
642 PDMACFILECACHEENTRY,
643 NodeNotCommitted);
644
645 while (!RTListNodeIsLast(&pEndpointCache->ListDirtyNotCommitted, &pEntry->NodeNotCommitted))
646 {
647 PPDMACFILECACHEENTRY pNext = RTListNodeGetNext(&pEntry->NodeNotCommitted, PDMACFILECACHEENTRY,
648 NodeNotCommitted);
649 pdmacFileCacheEntryCommit(pEndpointCache, pEntry);
650 cbCommitted += pEntry->cbData;
651 RTListNodeRemove(&pEntry->NodeNotCommitted);
652 pEntry = pNext;
653 }
654
655 /* Commit the last endpoint */
656 Assert(RTListNodeIsLast(&pEndpointCache->ListDirtyNotCommitted, &pEntry->NodeNotCommitted));
657 pdmacFileCacheEntryCommit(pEndpointCache, pEntry);
658 RTListNodeRemove(&pEntry->NodeNotCommitted);
659 AssertMsg(RTListIsEmpty(&pEndpointCache->ListDirtyNotCommitted),
660 ("Committed all entries but list is not empty\n"));
661 }
662
663 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
664 AssertMsg(pEndpointCache->pCache->cbDirty >= cbCommitted,
665 ("Number of committed bytes exceeds number of dirty bytes\n"));
666 ASMAtomicSubU32(&pEndpointCache->pCache->cbDirty, cbCommitted);
667}
668
669/**
670 * Commit all dirty entries in the cache.
671 *
672 * @returns nothing.
673 * @param pCache The global cache instance.
674 */
675static void pdmacFileCacheCommitDirtyEntries(PPDMACFILECACHEGLOBAL pCache)
676{
677 bool fCommitInProgress = ASMAtomicXchgBool(&pCache->fCommitInProgress, true);
678
679 if (!fCommitInProgress)
680 {
681 pdmacFileCacheLockEnter(pCache);
682 Assert(!RTListIsEmpty(&pCache->ListEndpoints));
683
684 PPDMACFILEENDPOINTCACHE pEndpointCache = RTListNodeGetFirst(&pCache->ListEndpoints,
685 PDMACFILEENDPOINTCACHE,
686 NodeCacheEndpoint);
687 AssertPtr(pEndpointCache);
688
689 while (!RTListNodeIsLast(&pCache->ListEndpoints, &pEndpointCache->NodeCacheEndpoint))
690 {
691 pdmacFileCacheEndpointCommit(pEndpointCache);
692
693 pEndpointCache = RTListNodeGetNext(&pEndpointCache->NodeCacheEndpoint, PDMACFILEENDPOINTCACHE,
694 NodeCacheEndpoint);
695 }
696
697 /* Commit the last endpoint */
698 Assert(RTListNodeIsLast(&pCache->ListEndpoints, &pEndpointCache->NodeCacheEndpoint));
699 pdmacFileCacheEndpointCommit(pEndpointCache);
700
701 pdmacFileCacheLockLeave(pCache);
702 ASMAtomicWriteBool(&pCache->fCommitInProgress, false);
703 }
704}
705
706/**
707 * Adds the given entry as a dirty to the cache.
708 *
709 * @returns Flag whether the amount of dirty bytes in the cache exceeds the threshold
710 * @param pEndpointCache The endpoint cache the entry belongs to.
711 * @param pEntry The entry to add.
712 */
713static bool pdmacFileCacheAddDirtyEntry(PPDMACFILEENDPOINTCACHE pEndpointCache, PPDMACFILECACHEENTRY pEntry)
714{
715 bool fDirtyBytesExceeded = false;
716 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
717
718 /* If the commit timer is disabled we commit right away. */
719 if (pCache->u32CommitTimeoutMs == 0)
720 {
721 pEntry->fFlags |= PDMACFILECACHE_ENTRY_IS_DIRTY;
722 pdmacFileCacheEntryCommit(pEndpointCache, pEntry);
723 }
724 else if (!(pEntry->fFlags & PDMACFILECACHE_ENTRY_IS_DIRTY))
725 {
726 pEntry->fFlags |= PDMACFILECACHE_ENTRY_IS_DIRTY;
727 RTListAppend(&pEndpointCache->ListDirtyNotCommitted, &pEntry->NodeNotCommitted);
728 uint32_t cbDirty = ASMAtomicAddU32(&pCache->cbDirty, pEntry->cbData);
729
730 fDirtyBytesExceeded = (cbDirty >= pCache->cbCommitDirtyThreshold);
731 }
732
733 return fDirtyBytesExceeded;
734}
735
736
737/**
738 * Completes a task segment freeing all ressources and completes the task handle
739 * if everything was transfered.
740 *
741 * @returns Next task segment handle.
742 * @param pEndpointCache The endpoint cache.
743 * @param pTaskSeg Task segment to complete.
744 */
745static PPDMACFILETASKSEG pdmacFileCacheTaskComplete(PPDMACFILEENDPOINTCACHE pEndpointCache, PPDMACFILETASKSEG pTaskSeg)
746{
747 PPDMACFILETASKSEG pNext = pTaskSeg->pNext;
748
749 uint32_t uOld = ASMAtomicSubS32(&pTaskSeg->pTask->cbTransferLeft, pTaskSeg->cbTransfer);
750 AssertMsg(uOld >= pTaskSeg->cbTransfer, ("New value would overflow\n"));
751 if (!(uOld - pTaskSeg->cbTransfer)
752 && !ASMAtomicXchgBool(&pTaskSeg->pTask->fCompleted, true))
753 pdmR3AsyncCompletionCompleteTask(&pTaskSeg->pTask->Core, true);
754
755 RTMemFree(pTaskSeg);
756
757 return pNext;
758}
759
760/**
761 * Completion callback for I/O tasks.
762 *
763 * @returns nothing.
764 * @param pTask The completed task.
765 * @param pvUser Opaque user data.
766 */
767static void pdmacFileCacheTaskCompleted(PPDMACTASKFILE pTask, void *pvUser)
768{
769 PPDMACFILECACHEENTRY pEntry = (PPDMACFILECACHEENTRY)pvUser;
770 PPDMACFILECACHEGLOBAL pCache = pEntry->pCache;
771 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint = pEntry->pEndpoint;
772 PPDMACFILEENDPOINTCACHE pEndpointCache = &pEndpoint->DataCache;
773
774 /* Reference the entry now as we are clearing the I/O in progres flag
775 * which protects the entry till now. */
776 pdmacFileEpCacheEntryRef(pEntry);
777
778 RTSemRWRequestWrite(pEndpoint->DataCache.SemRWEntries, RT_INDEFINITE_WAIT);
779 pEntry->fFlags &= ~PDMACFILECACHE_ENTRY_IO_IN_PROGRESS;
780
781 /* Process waiting segment list. The data in entry might have changed inbetween. */
782 bool fDirty = false;
783 PPDMACFILETASKSEG pCurr = pEntry->pWaitingHead;
784
785 AssertMsg((pCurr && pEntry->pWaitingTail) || (!pCurr && !pEntry->pWaitingTail),
786 ("The list tail was not updated correctly\n"));
787 pEntry->pWaitingTail = NULL;
788 pEntry->pWaitingHead = NULL;
789
790 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
791 {
792 AssertMsg(pEndpointCache->cWritesOutstanding > 0, ("Completed write request but outstanding task count is 0\n"));
793 ASMAtomicDecU32(&pEndpointCache->cWritesOutstanding);
794
795 pEntry->fFlags &= ~PDMACFILECACHE_ENTRY_IS_DIRTY;
796
797 if (pEntry->fFlags & PDMACFILECACHE_ENTRY_IS_DEPRECATED)
798 {
799 AssertMsg(!pCurr, ("The entry is deprecated but has waiting write segments attached\n"));
800
801 RTMemPageFree(pEntry->pbData);
802 pEntry->pbData = pEntry->pbDataReplace;
803 pEntry->pbDataReplace = NULL;
804 pEntry->fFlags &= ~PDMACFILECACHE_ENTRY_IS_DEPRECATED;
805 }
806 else
807 {
808 while (pCurr)
809 {
810 AssertMsg(pCurr->fWrite, ("Completed write entries should never have read tasks attached\n"));
811
812 memcpy(pEntry->pbData + pCurr->uBufOffset, pCurr->pvBuf, pCurr->cbTransfer);
813 fDirty = true;
814
815 pCurr = pdmacFileCacheTaskComplete(pEndpointCache, pCurr);
816 }
817 }
818 }
819 else
820 {
821 AssertMsg(pTask->enmTransferType == PDMACTASKFILETRANSFER_READ, ("Invalid transfer type\n"));
822 AssertMsg(!(pEntry->fFlags & (PDMACFILECACHE_ENTRY_IS_DIRTY | PDMACFILECACHE_ENTRY_IS_DEPRECATED)),
823 ("Invalid flags set\n"));
824
825 while (pCurr)
826 {
827 if (pCurr->fWrite)
828 {
829 memcpy(pEntry->pbData + pCurr->uBufOffset, pCurr->pvBuf, pCurr->cbTransfer);
830 fDirty = true;
831 }
832 else
833 memcpy(pCurr->pvBuf, pEntry->pbData + pCurr->uBufOffset, pCurr->cbTransfer);
834
835 pCurr = pdmacFileCacheTaskComplete(pEndpointCache, pCurr);
836 }
837 }
838
839 bool fCommit = false;
840 if (fDirty)
841 fCommit = pdmacFileCacheAddDirtyEntry(pEndpointCache, pEntry);
842
843 /* Complete a pending flush if all writes have completed */
844 if (!ASMAtomicReadU32(&pEndpointCache->cWritesOutstanding))
845 {
846 PPDMASYNCCOMPLETIONTASKFILE pTaskFlush = (PPDMASYNCCOMPLETIONTASKFILE)ASMAtomicXchgPtr((void * volatile *)&pEndpointCache->pTaskFlush, NULL);
847 if (pTaskFlush)
848 pdmR3AsyncCompletionCompleteTask(&pTaskFlush->Core, true);
849 }
850
851 RTSemRWReleaseWrite(pEndpoint->DataCache.SemRWEntries);
852
853 /* Dereference so that it isn't protected anymore except we issued anyother write for it. */
854 pdmacFileEpCacheEntryRelease(pEntry);
855
856 if (fCommit)
857 pdmacFileCacheCommitDirtyEntries(pCache);
858}
859
860/**
861 * Commit timer callback.
862 */
863static void pdmacFileCacheCommitTimerCallback(PVM pVM, PTMTIMER pTimer, void *pvUser)
864{
865 PPDMASYNCCOMPLETIONEPCLASSFILE pClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pvUser;
866 PPDMACFILECACHEGLOBAL pCache = &pClassFile->Cache;
867
868 LogFlowFunc(("Commit interval expired, commiting dirty entries\n"));
869
870 if (ASMAtomicReadU32(&pCache->cbDirty) > 0)
871 pdmacFileCacheCommitDirtyEntries(pCache);
872
873 TMTimerSetMillies(pTimer, pCache->u32CommitTimeoutMs);
874 LogFlowFunc(("Entries committed, going to sleep\n"));
875}
876
877/**
878 * Initializies the I/O cache.
879 *
880 * returns VBox status code.
881 * @param pClassFile The global class data for file endpoints.
882 * @param pCfgNode CFGM node to query configuration data from.
883 */
884int pdmacFileCacheInit(PPDMASYNCCOMPLETIONEPCLASSFILE pClassFile, PCFGMNODE pCfgNode)
885{
886 int rc = VINF_SUCCESS;
887 PPDMACFILECACHEGLOBAL pCache = &pClassFile->Cache;
888
889 rc = CFGMR3QueryU32Def(pCfgNode, "CacheSize", &pCache->cbMax, 5 * _1M);
890 AssertLogRelRCReturn(rc, rc);
891
892 RTListInit(&pCache->ListEndpoints);
893 pCache->cRefs = 0;
894 pCache->cbCached = 0;
895 pCache->fCommitInProgress = 0;
896 LogFlowFunc((": Maximum number of bytes cached %u\n", pCache->cbMax));
897
898 /* Initialize members */
899 pCache->LruRecentlyUsedIn.pHead = NULL;
900 pCache->LruRecentlyUsedIn.pTail = NULL;
901 pCache->LruRecentlyUsedIn.cbCached = 0;
902
903 pCache->LruRecentlyUsedOut.pHead = NULL;
904 pCache->LruRecentlyUsedOut.pTail = NULL;
905 pCache->LruRecentlyUsedOut.cbCached = 0;
906
907 pCache->LruFrequentlyUsed.pHead = NULL;
908 pCache->LruFrequentlyUsed.pTail = NULL;
909 pCache->LruFrequentlyUsed.cbCached = 0;
910
911 pCache->cbRecentlyUsedInMax = (pCache->cbMax / 100) * 25; /* 25% of the buffer size */
912 pCache->cbRecentlyUsedOutMax = (pCache->cbMax / 100) * 50; /* 50% of the buffer size */
913 LogFlowFunc((": cbRecentlyUsedInMax=%u cbRecentlyUsedOutMax=%u\n", pCache->cbRecentlyUsedInMax, pCache->cbRecentlyUsedOutMax));
914
915 /** @todo r=aeichner: Experiment to find optimal default values */
916 rc = CFGMR3QueryU32Def(pCfgNode, "CacheCommitIntervalMs", &pCache->u32CommitTimeoutMs, 10000 /* 10sec */);
917 AssertLogRelRCReturn(rc, rc);
918 rc = CFGMR3QueryU32(pCfgNode, "CacheCommitThreshold", &pCache->cbCommitDirtyThreshold);
919 if ( rc == VERR_CFGM_VALUE_NOT_FOUND
920 || rc == VERR_CFGM_NO_PARENT)
921 {
922 /* Start committing after 50% of the cache are dirty */
923 pCache->cbCommitDirtyThreshold = pCache->cbMax / 2;
924 }
925 else
926 return rc;
927
928 STAMR3Register(pClassFile->Core.pVM, &pCache->cbMax,
929 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
930 "/PDM/AsyncCompletion/File/cbMax",
931 STAMUNIT_BYTES,
932 "Maximum cache size");
933 STAMR3Register(pClassFile->Core.pVM, &pCache->cbCached,
934 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
935 "/PDM/AsyncCompletion/File/cbCached",
936 STAMUNIT_BYTES,
937 "Currently used cache");
938 STAMR3Register(pClassFile->Core.pVM, &pCache->LruRecentlyUsedIn.cbCached,
939 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
940 "/PDM/AsyncCompletion/File/cbCachedMruIn",
941 STAMUNIT_BYTES,
942 "Number of bytes cached in MRU list");
943 STAMR3Register(pClassFile->Core.pVM, &pCache->LruRecentlyUsedOut.cbCached,
944 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
945 "/PDM/AsyncCompletion/File/cbCachedMruOut",
946 STAMUNIT_BYTES,
947 "Number of bytes cached in FRU list");
948 STAMR3Register(pClassFile->Core.pVM, &pCache->LruFrequentlyUsed.cbCached,
949 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
950 "/PDM/AsyncCompletion/File/cbCachedFru",
951 STAMUNIT_BYTES,
952 "Number of bytes cached in FRU ghost list");
953
954#ifdef VBOX_WITH_STATISTICS
955 STAMR3Register(pClassFile->Core.pVM, &pCache->cHits,
956 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
957 "/PDM/AsyncCompletion/File/CacheHits",
958 STAMUNIT_COUNT, "Number of hits in the cache");
959 STAMR3Register(pClassFile->Core.pVM, &pCache->cPartialHits,
960 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
961 "/PDM/AsyncCompletion/File/CachePartialHits",
962 STAMUNIT_COUNT, "Number of partial hits in the cache");
963 STAMR3Register(pClassFile->Core.pVM, &pCache->cMisses,
964 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
965 "/PDM/AsyncCompletion/File/CacheMisses",
966 STAMUNIT_COUNT, "Number of misses when accessing the cache");
967 STAMR3Register(pClassFile->Core.pVM, &pCache->StatRead,
968 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
969 "/PDM/AsyncCompletion/File/CacheRead",
970 STAMUNIT_BYTES, "Number of bytes read from the cache");
971 STAMR3Register(pClassFile->Core.pVM, &pCache->StatWritten,
972 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
973 "/PDM/AsyncCompletion/File/CacheWritten",
974 STAMUNIT_BYTES, "Number of bytes written to the cache");
975 STAMR3Register(pClassFile->Core.pVM, &pCache->StatTreeGet,
976 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
977 "/PDM/AsyncCompletion/File/CacheTreeGet",
978 STAMUNIT_TICKS_PER_CALL, "Time taken to access an entry in the tree");
979 STAMR3Register(pClassFile->Core.pVM, &pCache->StatTreeInsert,
980 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
981 "/PDM/AsyncCompletion/File/CacheTreeInsert",
982 STAMUNIT_TICKS_PER_CALL, "Time taken to insert an entry in the tree");
983 STAMR3Register(pClassFile->Core.pVM, &pCache->StatTreeRemove,
984 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
985 "/PDM/AsyncCompletion/File/CacheTreeRemove",
986 STAMUNIT_TICKS_PER_CALL, "Time taken to remove an entry an the tree");
987 STAMR3Register(pClassFile->Core.pVM, &pCache->StatBuffersReused,
988 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
989 "/PDM/AsyncCompletion/File/CacheBuffersReused",
990 STAMUNIT_COUNT, "Number of times a buffer could be reused");
991#endif
992
993 /* Initialize the critical section */
994 rc = RTCritSectInit(&pCache->CritSect);
995
996 if (RT_SUCCESS(rc))
997 {
998 /* Create the commit timer */
999 if (pCache->u32CommitTimeoutMs > 0)
1000 rc = TMR3TimerCreateInternal(pClassFile->Core.pVM, TMCLOCK_REAL,
1001 pdmacFileCacheCommitTimerCallback,
1002 pClassFile,
1003 "Cache-Commit",
1004 &pClassFile->Cache.pTimerCommit);
1005
1006 if (RT_SUCCESS(rc))
1007 {
1008 LogRel(("AIOMgr: Cache successfully initialised. Cache size is %u bytes\n", pCache->cbMax));
1009 LogRel(("AIOMgr: Cache commit interval is %u ms\n", pCache->u32CommitTimeoutMs));
1010 LogRel(("AIOMgr: Cache commit threshold is %u bytes\n", pCache->cbCommitDirtyThreshold));
1011 return VINF_SUCCESS;
1012 }
1013
1014 RTCritSectDelete(&pCache->CritSect);
1015 }
1016
1017 return rc;
1018}
1019
1020/**
1021 * Destroysthe cache freeing all data.
1022 *
1023 * returns nothing.
1024 * @param pClassFile The global class data for file endpoints.
1025 */
1026void pdmacFileCacheDestroy(PPDMASYNCCOMPLETIONEPCLASSFILE pClassFile)
1027{
1028 PPDMACFILECACHEGLOBAL pCache = &pClassFile->Cache;
1029
1030 /* Make sure no one else uses the cache now */
1031 pdmacFileCacheLockEnter(pCache);
1032
1033 /* Cleanup deleting all cache entries waiting for in progress entries to finish. */
1034 pdmacFileCacheDestroyList(&pCache->LruRecentlyUsedIn);
1035 pdmacFileCacheDestroyList(&pCache->LruRecentlyUsedOut);
1036 pdmacFileCacheDestroyList(&pCache->LruFrequentlyUsed);
1037
1038 pdmacFileCacheLockLeave(pCache);
1039
1040 RTCritSectDelete(&pCache->CritSect);
1041}
1042
1043/**
1044 * Initializes per endpoint cache data
1045 * like the AVL tree used to access cached entries.
1046 *
1047 * @returns VBox status code.
1048 * @param pEndpoint The endpoint to init the cache for,
1049 * @param pClassFile The global class data for file endpoints.
1050 */
1051int pdmacFileEpCacheInit(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMASYNCCOMPLETIONEPCLASSFILE pClassFile)
1052{
1053 PPDMACFILEENDPOINTCACHE pEndpointCache = &pEndpoint->DataCache;
1054
1055 pEndpointCache->pCache = &pClassFile->Cache;
1056 RTListInit(&pEndpointCache->ListDirtyNotCommitted);
1057
1058 int rc = RTSemRWCreate(&pEndpointCache->SemRWEntries);
1059 if (RT_SUCCESS(rc))
1060 {
1061 pEndpointCache->pTree = (PAVLRFOFFTREE)RTMemAllocZ(sizeof(AVLRFOFFTREE));
1062 if (pEndpointCache->pTree)
1063 {
1064 pClassFile->Cache.cRefs++;
1065 RTListAppend(&pClassFile->Cache.ListEndpoints, &pEndpointCache->NodeCacheEndpoint);
1066
1067 /* Arm the timer if this is the first endpoint. */
1068 if ( pClassFile->Cache.cRefs == 1
1069 && pClassFile->Cache.u32CommitTimeoutMs > 0)
1070 rc = TMTimerSetMillies(pClassFile->Cache.pTimerCommit, pClassFile->Cache.u32CommitTimeoutMs);
1071 }
1072 else
1073 rc = VERR_NO_MEMORY;
1074
1075 if (RT_FAILURE(rc))
1076 RTSemRWDestroy(pEndpointCache->SemRWEntries);
1077 }
1078
1079#ifdef VBOX_WITH_STATISTICS
1080 if (RT_SUCCESS(rc))
1081 {
1082 STAMR3RegisterF(pClassFile->Core.pVM, &pEndpointCache->StatWriteDeferred,
1083 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1084 STAMUNIT_COUNT, "Number of deferred writes",
1085 "/PDM/AsyncCompletion/File/%s/Cache/DeferredWrites", RTPathFilename(pEndpoint->Core.pszUri));
1086 }
1087#endif
1088
1089 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1090 return rc;
1091}
1092
1093/**
1094 * Callback for the AVL destroy routine. Frees a cache entry for this endpoint.
1095 *
1096 * @returns IPRT status code.
1097 * @param pNode The node to destroy.
1098 * @param pvUser Opaque user data.
1099 */
1100static int pdmacFileEpCacheEntryDestroy(PAVLRFOFFNODECORE pNode, void *pvUser)
1101{
1102 PPDMACFILECACHEENTRY pEntry = (PPDMACFILECACHEENTRY)pNode;
1103 PPDMACFILECACHEGLOBAL pCache = (PPDMACFILECACHEGLOBAL)pvUser;
1104 PPDMACFILEENDPOINTCACHE pEndpointCache = &pEntry->pEndpoint->DataCache;
1105
1106 while (ASMAtomicReadU32(&pEntry->fFlags) & (PDMACFILECACHE_ENTRY_IO_IN_PROGRESS | PDMACFILECACHE_ENTRY_IS_DIRTY))
1107 {
1108 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1109 RTThreadSleep(250);
1110 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
1111 }
1112
1113 AssertMsg(!(pEntry->fFlags & (PDMACFILECACHE_ENTRY_IO_IN_PROGRESS | PDMACFILECACHE_ENTRY_IS_DIRTY)),
1114 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
1115
1116 bool fUpdateCache = pEntry->pList == &pCache->LruFrequentlyUsed
1117 || pEntry->pList == &pCache->LruRecentlyUsedIn;
1118
1119 pdmacFileCacheEntryRemoveFromList(pEntry);
1120
1121 if (fUpdateCache)
1122 pdmacFileCacheSub(pCache, pEntry->cbData);
1123
1124 RTMemPageFree(pEntry->pbData);
1125 RTMemFree(pEntry);
1126
1127 return VINF_SUCCESS;
1128}
1129
1130/**
1131 * Destroys all cache ressources used by the given endpoint.
1132 *
1133 * @returns nothing.
1134 * @param pEndpoint The endpoint to the destroy.
1135 */
1136void pdmacFileEpCacheDestroy(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
1137{
1138 PPDMACFILEENDPOINTCACHE pEndpointCache = &pEndpoint->DataCache;
1139 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
1140
1141 /* Make sure nobody is accessing the cache while we delete the tree. */
1142 pdmacFileCacheLockEnter(pCache);
1143 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
1144 RTAvlrFileOffsetDestroy(pEndpointCache->pTree, pdmacFileEpCacheEntryDestroy, pCache);
1145 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1146
1147 pCache->cRefs--;
1148 RTListNodeRemove(&pEndpointCache->NodeCacheEndpoint);
1149
1150 if ( !pCache->cRefs
1151 && pCache->u32CommitTimeoutMs > 0)
1152 TMTimerStop(pCache->pTimerCommit);
1153
1154 pdmacFileCacheLockLeave(pCache);
1155
1156 RTSemRWDestroy(pEndpointCache->SemRWEntries);
1157
1158#ifdef VBOX_WITH_STATISTICS
1159 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
1160
1161 STAMR3Deregister(pEpClassFile->Core.pVM, &pEndpointCache->StatWriteDeferred);
1162#endif
1163}
1164
1165static PPDMACFILECACHEENTRY pdmacFileEpCacheGetCacheEntryByOffset(PPDMACFILEENDPOINTCACHE pEndpointCache, RTFOFF off)
1166{
1167 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
1168 PPDMACFILECACHEENTRY pEntry = NULL;
1169
1170 STAM_PROFILE_ADV_START(&pCache->StatTreeGet, Cache);
1171
1172 RTSemRWRequestRead(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
1173 pEntry = (PPDMACFILECACHEENTRY)RTAvlrFileOffsetRangeGet(pEndpointCache->pTree, off);
1174 if (pEntry)
1175 pdmacFileEpCacheEntryRef(pEntry);
1176 RTSemRWReleaseRead(pEndpointCache->SemRWEntries);
1177
1178 STAM_PROFILE_ADV_STOP(&pCache->StatTreeGet, Cache);
1179
1180 return pEntry;
1181}
1182
1183static PPDMACFILECACHEENTRY pdmacFileEpCacheGetCacheBestFitEntryByOffset(PPDMACFILEENDPOINTCACHE pEndpointCache, RTFOFF off)
1184{
1185 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
1186 PPDMACFILECACHEENTRY pEntry = NULL;
1187
1188 STAM_PROFILE_ADV_START(&pCache->StatTreeGet, Cache);
1189
1190 RTSemRWRequestRead(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
1191 pEntry = (PPDMACFILECACHEENTRY)RTAvlrFileOffsetGetBestFit(pEndpointCache->pTree, off, true /*fAbove*/);
1192 if (pEntry)
1193 pdmacFileEpCacheEntryRef(pEntry);
1194 RTSemRWReleaseRead(pEndpointCache->SemRWEntries);
1195
1196 STAM_PROFILE_ADV_STOP(&pCache->StatTreeGet, Cache);
1197
1198 return pEntry;
1199}
1200
1201static void pdmacFileEpCacheInsertEntry(PPDMACFILEENDPOINTCACHE pEndpointCache, PPDMACFILECACHEENTRY pEntry)
1202{
1203 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
1204
1205 STAM_PROFILE_ADV_START(&pCache->StatTreeInsert, Cache);
1206 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
1207 bool fInserted = RTAvlrFileOffsetInsert(pEndpointCache->pTree, &pEntry->Core);
1208 AssertMsg(fInserted, ("Node was not inserted into tree\n"));
1209 STAM_PROFILE_ADV_STOP(&pCache->StatTreeInsert, Cache);
1210 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1211}
1212
1213/**
1214 * Allocates and initializes a new entry for the cache.
1215 * The entry has a reference count of 1.
1216 *
1217 * @returns Pointer to the new cache entry or NULL if out of memory.
1218 * @param pCache The cache the entry belongs to.
1219 * @param pEndoint The endpoint the entry holds data for.
1220 * @param off Start offset.
1221 * @param cbData Size of the cache entry.
1222 * @param pbBuffer Pointer to the buffer to use.
1223 * NULL if a new buffer should be allocated.
1224 * The buffer needs to have the same size of the entry.
1225 */
1226static PPDMACFILECACHEENTRY pdmacFileCacheEntryAlloc(PPDMACFILECACHEGLOBAL pCache,
1227 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
1228 RTFOFF off, size_t cbData, uint8_t *pbBuffer)
1229{
1230 PPDMACFILECACHEENTRY pEntryNew = (PPDMACFILECACHEENTRY)RTMemAllocZ(sizeof(PDMACFILECACHEENTRY));
1231
1232 if (RT_UNLIKELY(!pEntryNew))
1233 return NULL;
1234
1235 pEntryNew->Core.Key = off;
1236 pEntryNew->Core.KeyLast = off + cbData - 1;
1237 pEntryNew->pEndpoint = pEndpoint;
1238 pEntryNew->pCache = pCache;
1239 pEntryNew->fFlags = 0;
1240 pEntryNew->cRefs = 1; /* We are using it now. */
1241 pEntryNew->pList = NULL;
1242 pEntryNew->cbData = cbData;
1243 pEntryNew->pWaitingHead = NULL;
1244 pEntryNew->pWaitingTail = NULL;
1245 pEntryNew->pbDataReplace = NULL;
1246 if (pbBuffer)
1247 pEntryNew->pbData = pbBuffer;
1248 else
1249 pEntryNew->pbData = (uint8_t *)RTMemPageAlloc(cbData);
1250
1251 if (RT_UNLIKELY(!pEntryNew->pbData))
1252 {
1253 RTMemFree(pEntryNew);
1254 return NULL;
1255 }
1256
1257 return pEntryNew;
1258}
1259
1260/**
1261 * Adds a segment to the waiting list for a cache entry
1262 * which is currently in progress.
1263 *
1264 * @returns nothing.
1265 * @param pEntry The cache entry to add the segment to.
1266 * @param pSeg The segment to add.
1267 */
1268DECLINLINE(void) pdmacFileEpCacheEntryAddWaitingSegment(PPDMACFILECACHEENTRY pEntry, PPDMACFILETASKSEG pSeg)
1269{
1270 pSeg->pNext = NULL;
1271
1272 if (pEntry->pWaitingHead)
1273 {
1274 AssertPtr(pEntry->pWaitingTail);
1275
1276 pEntry->pWaitingTail->pNext = pSeg;
1277 pEntry->pWaitingTail = pSeg;
1278 }
1279 else
1280 {
1281 Assert(!pEntry->pWaitingTail);
1282
1283 pEntry->pWaitingHead = pSeg;
1284 pEntry->pWaitingTail = pSeg;
1285 }
1286}
1287
1288/**
1289 * Checks that a set of flags is set/clear acquiring the R/W semaphore
1290 * in exclusive mode.
1291 *
1292 * @returns true if the flag in fSet is set and the one in fClear is clear.
1293 * false othwerise.
1294 * The R/W semaphore is only held if true is returned.
1295 *
1296 * @param pEndpointCache The endpoint cache instance data.
1297 * @param pEntry The entry to check the flags for.
1298 * @param fSet The flag which is tested to be set.
1299 * @param fClear The flag which is tested to be clear.
1300 */
1301DECLINLINE(bool) pdmacFileEpCacheEntryFlagIsSetClearAcquireLock(PPDMACFILEENDPOINTCACHE pEndpointCache,
1302 PPDMACFILECACHEENTRY pEntry,
1303 uint32_t fSet, uint32_t fClear)
1304{
1305 uint32_t fFlags = ASMAtomicReadU32(&pEntry->fFlags);
1306 bool fPassed = ((fFlags & fSet) && !(fFlags & fClear));
1307
1308 if (fPassed)
1309 {
1310 /* Acquire the lock and check again becuase the completion callback might have raced us. */
1311 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
1312
1313 fFlags = ASMAtomicReadU32(&pEntry->fFlags);
1314 fPassed = ((fFlags & fSet) && !(fFlags & fClear));
1315
1316 /* Drop the lock if we didn't passed the test. */
1317 if (!fPassed)
1318 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1319 }
1320
1321 return fPassed;
1322}
1323
1324/**
1325 * Copies data to a buffer described by a I/O memory context.
1326 *
1327 * @returns nothing.
1328 * @param pIoMemCtx The I/O memory context to copy the data into.
1329 * @param pbData Pointer to the data data to copy.
1330 * @param cbData Amount of data to copy.
1331 */
1332static void pdmacFileEpCacheCopyToIoMemCtx(PPDMIOMEMCTX pIoMemCtx,
1333 uint8_t *pbData,
1334 size_t cbData)
1335{
1336 while (cbData)
1337 {
1338 size_t cbCopy = cbData;
1339 uint8_t *pbBuf = pdmIoMemCtxGetBuffer(pIoMemCtx, &cbCopy);
1340
1341 AssertPtr(pbBuf);
1342
1343 memcpy(pbBuf, pbData, cbCopy);
1344
1345 cbData -= cbCopy;
1346 pbData += cbCopy;
1347 }
1348}
1349
1350/**
1351 * Copies data from a buffer described by a I/O memory context.
1352 *
1353 * @returns nothing.
1354 * @param pIoMemCtx The I/O memory context to copy the data from.
1355 * @param pbData Pointer to the destination buffer.
1356 * @param cbData Amount of data to copy.
1357 */
1358static void pdmacFileEpCacheCopyFromIoMemCtx(PPDMIOMEMCTX pIoMemCtx,
1359 uint8_t *pbData,
1360 size_t cbData)
1361{
1362 while (cbData)
1363 {
1364 size_t cbCopy = cbData;
1365 uint8_t *pbBuf = pdmIoMemCtxGetBuffer(pIoMemCtx, &cbCopy);
1366
1367 AssertPtr(pbBuf);
1368
1369 memcpy(pbData, pbBuf, cbCopy);
1370
1371 cbData -= cbCopy;
1372 pbData += cbCopy;
1373 }
1374}
1375
1376/**
1377 * Add a buffer described by the I/O memory context
1378 * to the entry waiting for completion.
1379 *
1380 * @returns nothing.
1381 * @param pEntry The entry to add the buffer to.
1382 * @param pTask Task associated with the buffer.
1383 * @param pIoMemCtx The memory context to use.
1384 * @param OffDiff Offset from the start of the buffer
1385 * in the entry.
1386 * @param cbData Amount of data to wait for onthis entry.
1387 * @param fWrite Flag whether the task waits because it wants to write
1388 * to the cache entry.
1389 */
1390static void pdmacFileEpCacheEntryWaitersAdd(PPDMACFILECACHEENTRY pEntry,
1391 PPDMASYNCCOMPLETIONTASKFILE pTask,
1392 PPDMIOMEMCTX pIoMemCtx,
1393 RTFOFF OffDiff,
1394 size_t cbData,
1395 bool fWrite)
1396{
1397 while (cbData)
1398 {
1399 PPDMACFILETASKSEG pSeg = (PPDMACFILETASKSEG)RTMemAllocZ(sizeof(PDMACFILETASKSEG));
1400 size_t cbSeg = cbData;
1401 uint8_t *pbBuf = pdmIoMemCtxGetBuffer(pIoMemCtx, &cbSeg);
1402
1403 pSeg->pTask = pTask;
1404 pSeg->uBufOffset = OffDiff;
1405 pSeg->cbTransfer = cbSeg;
1406 pSeg->pvBuf = pbBuf;
1407 pSeg->fWrite = fWrite;
1408
1409 pdmacFileEpCacheEntryAddWaitingSegment(pEntry, pSeg);
1410
1411 cbData -= cbSeg;
1412 OffDiff += cbSeg;
1413 }
1414}
1415
1416/**
1417 * Passthrough a part of a request directly to the I/O manager
1418 * handling the endpoint.
1419 *
1420 * @returns nothing.
1421 * @param pEndpoint The endpoint.
1422 * @param pTask The task.
1423 * @param pIoMemCtx The I/O memory context to use.
1424 * @param offStart Offset to start transfer from.
1425 * @param cbData Amount of data to transfer.
1426 * @param enmTransferType The transfer type (read/write)
1427 */
1428static void pdmacFileEpCacheRequestPassthrough(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
1429 PPDMASYNCCOMPLETIONTASKFILE pTask,
1430 PPDMIOMEMCTX pIoMemCtx,
1431 RTFOFF offStart, size_t cbData,
1432 PDMACTASKFILETRANSFER enmTransferType)
1433{
1434 while (cbData)
1435 {
1436 size_t cbSeg = cbData;
1437 uint8_t *pbBuf = pdmIoMemCtxGetBuffer(pIoMemCtx, &cbSeg);
1438 PPDMACTASKFILE pIoTask = pdmacFileTaskAlloc(pEndpoint);
1439 AssertPtr(pIoTask);
1440
1441 pIoTask->pEndpoint = pEndpoint;
1442 pIoTask->enmTransferType = enmTransferType;
1443 pIoTask->Off = offStart;
1444 pIoTask->DataSeg.cbSeg = cbSeg;
1445 pIoTask->DataSeg.pvSeg = pbBuf;
1446 pIoTask->pvUser = pTask;
1447 pIoTask->pfnCompleted = pdmacFileEpTaskCompleted;
1448
1449 offStart += cbSeg;
1450 cbData -= cbSeg;
1451
1452 /* Send it off to the I/O manager. */
1453 pdmacFileEpAddTask(pEndpoint, pIoTask);
1454 }
1455}
1456
1457/**
1458 * Reads the specified data from the endpoint using the cache if possible.
1459 *
1460 * @returns VBox status code.
1461 * @param pEndpoint The endpoint to read from.
1462 * @param pTask The task structure used as identifier for this request.
1463 * @param off The offset to start reading from.
1464 * @param paSegments Pointer to the array holding the destination buffers.
1465 * @param cSegments Number of segments in the array.
1466 * @param cbRead Number of bytes to read.
1467 */
1468int pdmacFileEpCacheRead(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMASYNCCOMPLETIONTASKFILE pTask,
1469 RTFOFF off, PCPDMDATASEG paSegments, size_t cSegments,
1470 size_t cbRead)
1471{
1472 int rc = VINF_SUCCESS;
1473 PPDMACFILEENDPOINTCACHE pEndpointCache = &pEndpoint->DataCache;
1474 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
1475 PPDMACFILECACHEENTRY pEntry;
1476
1477 LogFlowFunc((": pEndpoint=%#p{%s} pTask=%#p off=%RTfoff paSegments=%#p cSegments=%u cbRead=%u\n",
1478 pEndpoint, pEndpoint->Core.pszUri, pTask, off, paSegments, cSegments, cbRead));
1479
1480 pTask->cbTransferLeft = cbRead;
1481 /* Set to completed to make sure that the task is valid while we access it. */
1482 ASMAtomicWriteBool(&pTask->fCompleted, true);
1483
1484 /* Init the I/O memory context */
1485 PDMIOMEMCTX IoMemCtx;
1486 pdmIoMemCtxInit(&IoMemCtx, paSegments, cSegments);
1487
1488 while (cbRead)
1489 {
1490 size_t cbToRead;
1491
1492 pEntry = pdmacFileEpCacheGetCacheEntryByOffset(pEndpointCache, off);
1493
1494 /*
1495 * If there is no entry we try to create a new one eviciting unused pages
1496 * if the cache is full. If this is not possible we will pass the request through
1497 * and skip the caching (all entries may be still in progress so they can't
1498 * be evicted)
1499 * If we have an entry it can be in one of the LRU lists where the entry
1500 * contains data (recently used or frequently used LRU) so we can just read
1501 * the data we need and put the entry at the head of the frequently used LRU list.
1502 * In case the entry is in one of the ghost lists it doesn't contain any data.
1503 * We have to fetch it again evicting pages from either T1 or T2 to make room.
1504 */
1505 if (pEntry)
1506 {
1507 RTFOFF OffDiff = off - pEntry->Core.Key;
1508
1509 AssertMsg(off >= pEntry->Core.Key,
1510 ("Overflow in calculation off=%RTfoff OffsetAligned=%RTfoff\n",
1511 off, pEntry->Core.Key));
1512
1513 AssertPtr(pEntry->pList);
1514
1515 cbToRead = RT_MIN(pEntry->cbData - OffDiff, cbRead);
1516
1517 AssertMsg(off + (RTFOFF)cbToRead <= pEntry->Core.Key + pEntry->Core.KeyLast + 1,
1518 ("Buffer of cache entry exceeded off=%RTfoff cbToRead=%d\n",
1519 off, cbToRead));
1520
1521 cbRead -= cbToRead;
1522
1523 if (!cbRead)
1524 STAM_COUNTER_INC(&pCache->cHits);
1525 else
1526 STAM_COUNTER_INC(&pCache->cPartialHits);
1527
1528 STAM_COUNTER_ADD(&pCache->StatRead, cbToRead);
1529
1530 /* Ghost lists contain no data. */
1531 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
1532 || (pEntry->pList == &pCache->LruFrequentlyUsed))
1533 {
1534 if (pdmacFileEpCacheEntryFlagIsSetClearAcquireLock(pEndpointCache, pEntry,
1535 PDMACFILECACHE_ENTRY_IS_DEPRECATED,
1536 0))
1537 {
1538 /* Entry is deprecated. Read data from the new buffer. */
1539 pdmacFileEpCacheCopyToIoMemCtx(&IoMemCtx, pEntry->pbDataReplace + OffDiff, cbToRead);
1540 ASMAtomicSubS32(&pTask->cbTransferLeft, cbToRead);
1541 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1542 }
1543 else
1544 {
1545 if (pdmacFileEpCacheEntryFlagIsSetClearAcquireLock(pEndpointCache, pEntry,
1546 PDMACFILECACHE_ENTRY_IO_IN_PROGRESS,
1547 PDMACFILECACHE_ENTRY_IS_DIRTY))
1548 {
1549 /* Entry didn't completed yet. Append to the list */
1550 pdmacFileEpCacheEntryWaitersAdd(pEntry, pTask,
1551 &IoMemCtx,
1552 OffDiff, cbToRead,
1553 false /* fWrite */);
1554 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1555 }
1556 else
1557 {
1558 /* Read as much as we can from the entry. */
1559 pdmacFileEpCacheCopyToIoMemCtx(&IoMemCtx, pEntry->pbData + OffDiff, cbToRead);
1560 ASMAtomicSubS32(&pTask->cbTransferLeft, cbToRead);
1561 }
1562 }
1563
1564 /* Move this entry to the top position */
1565 if (pEntry->pList == &pCache->LruFrequentlyUsed)
1566 {
1567 pdmacFileCacheLockEnter(pCache);
1568 pdmacFileCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1569 pdmacFileCacheLockLeave(pCache);
1570 }
1571 /* Release the entry */
1572 pdmacFileEpCacheEntryRelease(pEntry);
1573 }
1574 else
1575 {
1576 uint8_t *pbBuffer = NULL;
1577
1578 LogFlow(("Fetching data for ghost entry %#p from file\n", pEntry));
1579
1580 pdmacFileCacheLockEnter(pCache);
1581 pdmacFileCacheEntryRemoveFromList(pEntry); /* Remove it before we remove data, otherwise it may get freed when evicting data. */
1582 bool fEnough = pdmacFileCacheReclaim(pCache, pEntry->cbData, true, &pbBuffer);
1583
1584 /* Move the entry to Am and fetch it to the cache. */
1585 if (fEnough)
1586 {
1587 pdmacFileCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1588 pdmacFileCacheAdd(pCache, pEntry->cbData);
1589 pdmacFileCacheLockLeave(pCache);
1590
1591 if (pbBuffer)
1592 pEntry->pbData = pbBuffer;
1593 else
1594 pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
1595 AssertPtr(pEntry->pbData);
1596
1597 pdmacFileEpCacheEntryWaitersAdd(pEntry, pTask,
1598 &IoMemCtx,
1599 OffDiff, cbToRead,
1600 false /* fWrite */);
1601 pdmacFileCacheReadFromEndpoint(pEntry);
1602 /* Release the entry */
1603 pdmacFileEpCacheEntryRelease(pEntry);
1604 }
1605 else
1606 {
1607 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
1608 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
1609 RTAvlrFileOffsetRemove(pEndpointCache->pTree, pEntry->Core.Key);
1610 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
1611 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1612
1613 pdmacFileCacheLockLeave(pCache);
1614
1615 RTMemFree(pEntry);
1616
1617 pdmacFileEpCacheRequestPassthrough(pEndpoint, pTask,
1618 &IoMemCtx, off, cbToRead,
1619 PDMACTASKFILETRANSFER_READ);
1620 }
1621 }
1622 }
1623 else
1624 {
1625 /* No entry found for this offset. Get best fit entry and fetch the data to the cache. */
1626 size_t cbToReadAligned;
1627 PPDMACFILECACHEENTRY pEntryBestFit = pdmacFileEpCacheGetCacheBestFitEntryByOffset(pEndpointCache, off);
1628
1629 LogFlow(("%sbest fit entry for off=%RTfoff (BestFit=%RTfoff BestFitEnd=%RTfoff BestFitSize=%u)\n",
1630 pEntryBestFit ? "" : "No ",
1631 off,
1632 pEntryBestFit ? pEntryBestFit->Core.Key : 0,
1633 pEntryBestFit ? pEntryBestFit->Core.KeyLast : 0,
1634 pEntryBestFit ? pEntryBestFit->cbData : 0));
1635
1636 if ( pEntryBestFit
1637 && off + (RTFOFF)cbRead > pEntryBestFit->Core.Key)
1638 {
1639 cbToRead = pEntryBestFit->Core.Key - off;
1640 pdmacFileEpCacheEntryRelease(pEntryBestFit);
1641 cbToReadAligned = cbToRead;
1642 }
1643 else
1644 {
1645 /*
1646 * Align the size to a 4KB boundary.
1647 * Memory size is aligned to a page boundary
1648 * and memory is wasted if the size is rahter small.
1649 * (For example reads with a size of 512 bytes.
1650 */
1651 cbToRead = cbRead;
1652 cbToReadAligned = RT_ALIGN_Z(cbRead, PAGE_SIZE);
1653
1654 /* Clip read to file size */
1655 cbToReadAligned = RT_MIN(pEndpoint->cbFile - off, cbToReadAligned);
1656 if (pEntryBestFit)
1657 {
1658 Assert(pEntryBestFit->Core.Key >= off);
1659 cbToReadAligned = RT_MIN(cbToReadAligned, (uint64_t)pEntryBestFit->Core.Key - off);
1660 pdmacFileEpCacheEntryRelease(pEntryBestFit);
1661 }
1662 }
1663
1664 cbRead -= cbToRead;
1665
1666 if (!cbRead)
1667 STAM_COUNTER_INC(&pCache->cMisses);
1668 else
1669 STAM_COUNTER_INC(&pCache->cPartialHits);
1670
1671 uint8_t *pbBuffer = NULL;
1672
1673 pdmacFileCacheLockEnter(pCache);
1674 bool fEnough = pdmacFileCacheReclaim(pCache, cbToReadAligned, true, &pbBuffer);
1675
1676 if (fEnough)
1677 {
1678 LogFlow(("Evicted enough bytes (%u requested). Creating new cache entry\n", cbToReadAligned));
1679
1680 PPDMACFILECACHEENTRY pEntryNew = pdmacFileCacheEntryAlloc(pCache, pEndpoint, off, cbToReadAligned, pbBuffer);
1681 AssertPtr(pEntryNew);
1682
1683 pdmacFileCacheEntryAddToList(&pCache->LruRecentlyUsedIn, pEntryNew);
1684 pdmacFileCacheAdd(pCache, cbToReadAligned);
1685 pdmacFileCacheLockLeave(pCache);
1686
1687 pdmacFileEpCacheInsertEntry(pEndpointCache, pEntryNew);
1688
1689 AssertMsg( (off >= pEntryNew->Core.Key)
1690 && (off + (RTFOFF)cbToRead <= pEntryNew->Core.Key + pEntryNew->Core.KeyLast + 1),
1691 ("Overflow in calculation off=%RTfoff OffsetAligned=%RTfoff\n",
1692 off, pEntryNew->Core.Key));
1693
1694 pdmacFileEpCacheEntryWaitersAdd(pEntryNew, pTask,
1695 &IoMemCtx, 0, cbToRead,
1696 false /* fWrite */);
1697 pdmacFileCacheReadFromEndpoint(pEntryNew);
1698 pdmacFileEpCacheEntryRelease(pEntryNew); /* it is protected by the I/O in progress flag now. */
1699 }
1700 else
1701 {
1702 pdmacFileCacheLockLeave(pCache);
1703
1704 /*
1705 * There is not enough free space in the cache.
1706 * Pass the request directly to the I/O manager.
1707 */
1708 LogFlow(("Couldn't evict %u bytes from the cache. Remaining request will be passed through\n", cbToRead));
1709
1710 pdmacFileEpCacheRequestPassthrough(pEndpoint, pTask,
1711 &IoMemCtx, off, cbToRead,
1712 PDMACTASKFILETRANSFER_READ);
1713 }
1714 }
1715 off += cbToRead;
1716 }
1717
1718 ASMAtomicWriteBool(&pTask->fCompleted, false);
1719
1720 if (ASMAtomicReadS32(&pTask->cbTransferLeft) == 0
1721 && !ASMAtomicXchgBool(&pTask->fCompleted, true))
1722 pdmR3AsyncCompletionCompleteTask(&pTask->Core, false);
1723 else
1724 rc = VINF_AIO_TASK_PENDING;
1725
1726 LogFlowFunc((": Leave rc=%Rrc\n", rc));
1727
1728 return rc;
1729}
1730
1731/**
1732 * Writes the given data to the endpoint using the cache if possible.
1733 *
1734 * @returns VBox status code.
1735 * @param pEndpoint The endpoint to write to.
1736 * @param pTask The task structure used as identifier for this request.
1737 * @param off The offset to start writing to
1738 * @param paSegments Pointer to the array holding the source buffers.
1739 * @param cSegments Number of segments in the array.
1740 * @param cbWrite Number of bytes to write.
1741 */
1742int pdmacFileEpCacheWrite(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMASYNCCOMPLETIONTASKFILE pTask,
1743 RTFOFF off, PCPDMDATASEG paSegments, size_t cSegments,
1744 size_t cbWrite)
1745{
1746 int rc = VINF_SUCCESS;
1747 PPDMACFILEENDPOINTCACHE pEndpointCache = &pEndpoint->DataCache;
1748 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
1749 PPDMACFILECACHEENTRY pEntry;
1750
1751 LogFlowFunc((": pEndpoint=%#p{%s} pTask=%#p off=%RTfoff paSegments=%#p cSegments=%u cbWrite=%u\n",
1752 pEndpoint, pEndpoint->Core.pszUri, pTask, off, paSegments, cSegments, cbWrite));
1753
1754 pTask->cbTransferLeft = cbWrite;
1755 /* Set to completed to make sure that the task is valid while we access it. */
1756 ASMAtomicWriteBool(&pTask->fCompleted, true);
1757
1758 /* Init the I/O memory context */
1759 PDMIOMEMCTX IoMemCtx;
1760 pdmIoMemCtxInit(&IoMemCtx, paSegments, cSegments);
1761
1762 while (cbWrite)
1763 {
1764 size_t cbToWrite;
1765
1766 pEntry = pdmacFileEpCacheGetCacheEntryByOffset(pEndpointCache, off);
1767
1768 if (pEntry)
1769 {
1770 /* Write the data into the entry and mark it as dirty */
1771 AssertPtr(pEntry->pList);
1772
1773 RTFOFF OffDiff = off - pEntry->Core.Key;
1774
1775 AssertMsg(off >= pEntry->Core.Key,
1776 ("Overflow in calculation off=%RTfoff OffsetAligned=%RTfoff\n",
1777 off, pEntry->Core.Key));
1778
1779 cbToWrite = RT_MIN(pEntry->cbData - OffDiff, cbWrite);
1780 cbWrite -= cbToWrite;
1781
1782 if (!cbWrite)
1783 STAM_COUNTER_INC(&pCache->cHits);
1784 else
1785 STAM_COUNTER_INC(&pCache->cPartialHits);
1786
1787 STAM_COUNTER_ADD(&pCache->StatWritten, cbToWrite);
1788
1789 /* Ghost lists contain no data. */
1790 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
1791 || (pEntry->pList == &pCache->LruFrequentlyUsed))
1792 {
1793 /* Check if the buffer is deprecated. */
1794 if(pdmacFileEpCacheEntryFlagIsSetClearAcquireLock(pEndpointCache, pEntry,
1795 PDMACFILECACHE_ENTRY_IS_DEPRECATED,
1796 0))
1797 {
1798 AssertMsg(pEntry->fFlags & PDMACFILECACHE_ENTRY_IO_IN_PROGRESS,
1799 ("Entry is deprecated but not in progress\n"));
1800 AssertPtr(pEntry->pbDataReplace);
1801
1802 LogFlow(("Writing to deprecated buffer of entry %#p\n", pEntry));
1803
1804 /* Update the data from the write. */
1805 pdmacFileEpCacheCopyFromIoMemCtx(&IoMemCtx,
1806 pEntry->pbDataReplace + OffDiff,
1807 cbToWrite);
1808 ASMAtomicSubS32(&pTask->cbTransferLeft, cbToWrite);
1809 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1810 }
1811 else /* Deprecated flag not set */
1812 {
1813 /* Check if the entry is dirty. */
1814 if(pdmacFileEpCacheEntryFlagIsSetClearAcquireLock(pEndpointCache, pEntry,
1815 PDMACFILECACHE_ENTRY_IS_DIRTY,
1816 0))
1817 {
1818 /* If it is dirty but not in progrss just update the data. */
1819 if (!(pEntry->fFlags & PDMACFILECACHE_ENTRY_IO_IN_PROGRESS))
1820 {
1821 pdmacFileEpCacheCopyFromIoMemCtx(&IoMemCtx,
1822 pEntry->pbData + OffDiff,
1823 cbToWrite);
1824 }
1825 else
1826 {
1827 Assert(!pEntry->pbDataReplace);
1828
1829 /* Deprecate the current buffer. */
1830 if (!pEntry->pWaitingHead)
1831 pEntry->pbDataReplace = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
1832
1833 /* If we are out of memory or have waiting segments
1834 * defer the write. */
1835 if (!pEntry->pbDataReplace || pEntry->pWaitingHead)
1836 {
1837 /* The data isn't written to the file yet */
1838 pdmacFileEpCacheEntryWaitersAdd(pEntry, pTask,
1839 &IoMemCtx,
1840 OffDiff, cbToWrite,
1841 true /* fWrite */);
1842 STAM_COUNTER_INC(&pEndpointCache->StatWriteDeferred);
1843 }
1844 else /* Deprecate buffer */
1845 {
1846 LogFlow(("Deprecating buffer for entry %#p\n", pEntry));
1847 pEntry->fFlags |= PDMACFILECACHE_ENTRY_IS_DEPRECATED;
1848
1849 /* Copy the data before the update. */
1850 if (OffDiff)
1851 memcpy(pEntry->pbDataReplace, pEntry->pbData, OffDiff);
1852
1853 /* Copy data behind the update. */
1854 if ((pEntry->cbData - OffDiff - cbToWrite) > 0)
1855 memcpy(pEntry->pbDataReplace + OffDiff + cbToWrite,
1856 pEntry->pbData + OffDiff + cbToWrite,
1857 (pEntry->cbData - OffDiff - cbToWrite));
1858
1859 /* Update the data from the write. */
1860 pdmacFileEpCacheCopyFromIoMemCtx(&IoMemCtx,
1861 pEntry->pbDataReplace + OffDiff,
1862 cbToWrite);
1863 /* We are done here. A new write is initiated if the current request completes. */
1864 }
1865 }
1866
1867 ASMAtomicSubS32(&pTask->cbTransferLeft, cbToWrite);
1868 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1869 }
1870 else /* Dirty bit not set */
1871 {
1872 /*
1873 * Check if a read is in progress for this entry.
1874 * We have to defer processing in that case.
1875 */
1876 if(pdmacFileEpCacheEntryFlagIsSetClearAcquireLock(pEndpointCache, pEntry,
1877 PDMACFILECACHE_ENTRY_IO_IN_PROGRESS,
1878 0))
1879 {
1880 pdmacFileEpCacheEntryWaitersAdd(pEntry, pTask,
1881 &IoMemCtx,
1882 OffDiff, cbToWrite,
1883 true /* fWrite */);
1884 STAM_COUNTER_INC(&pEndpointCache->StatWriteDeferred);
1885 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1886 }
1887 else /* I/O in progress flag not set */
1888 {
1889 /* Write as much as we can into the entry and update the file. */
1890 pdmacFileEpCacheCopyFromIoMemCtx(&IoMemCtx,
1891 pEntry->pbData + OffDiff,
1892 cbToWrite);
1893 ASMAtomicSubS32(&pTask->cbTransferLeft, cbToWrite);
1894
1895 bool fCommit = pdmacFileCacheAddDirtyEntry(pEndpointCache, pEntry);
1896 if (fCommit)
1897 pdmacFileCacheCommitDirtyEntries(pCache);
1898 }
1899 } /* Dirty bit not set */
1900
1901 /* Move this entry to the top position */
1902 if (pEntry->pList == &pCache->LruFrequentlyUsed)
1903 {
1904 pdmacFileCacheLockEnter(pCache);
1905 pdmacFileCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1906 pdmacFileCacheLockLeave(pCache);
1907 } /* Deprecated flag not set. */
1908 }
1909 pdmacFileEpCacheEntryRelease(pEntry);
1910 }
1911 else /* Entry is on the ghost list */
1912 {
1913 uint8_t *pbBuffer = NULL;
1914
1915 pdmacFileCacheLockEnter(pCache);
1916 pdmacFileCacheEntryRemoveFromList(pEntry); /* Remove it before we remove data, otherwise it may get freed when evicting data. */
1917 bool fEnough = pdmacFileCacheReclaim(pCache, pEntry->cbData, true, &pbBuffer);
1918
1919 if (fEnough)
1920 {
1921 /* Move the entry to Am and fetch it to the cache. */
1922 pdmacFileCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1923 pdmacFileCacheAdd(pCache, pEntry->cbData);
1924 pdmacFileCacheLockLeave(pCache);
1925
1926 if (pbBuffer)
1927 pEntry->pbData = pbBuffer;
1928 else
1929 pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
1930 AssertPtr(pEntry->pbData);
1931
1932 pdmacFileEpCacheEntryWaitersAdd(pEntry, pTask,
1933 &IoMemCtx,
1934 OffDiff, cbToWrite,
1935 true /* fWrite */);
1936 STAM_COUNTER_INC(&pEndpointCache->StatWriteDeferred);
1937 pdmacFileCacheReadFromEndpoint(pEntry);
1938
1939 /* Release the reference. If it is still needed the I/O in progress flag should protect it now. */
1940 pdmacFileEpCacheEntryRelease(pEntry);
1941 }
1942 else
1943 {
1944 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
1945 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
1946 RTAvlrFileOffsetRemove(pEndpointCache->pTree, pEntry->Core.Key);
1947 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
1948 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1949
1950 pdmacFileCacheLockLeave(pCache);
1951
1952 RTMemFree(pEntry);
1953 pdmacFileEpCacheRequestPassthrough(pEndpoint, pTask,
1954 &IoMemCtx, off, cbToWrite,
1955 PDMACTASKFILETRANSFER_WRITE);
1956 }
1957 }
1958 }
1959 else /* No entry found */
1960 {
1961 /*
1962 * No entry found. Try to create a new cache entry to store the data in and if that fails
1963 * write directly to the file.
1964 */
1965 PPDMACFILECACHEENTRY pEntryBestFit = pdmacFileEpCacheGetCacheBestFitEntryByOffset(pEndpointCache, off);
1966
1967 LogFlow(("%sest fit entry for off=%RTfoff (BestFit=%RTfoff BestFitEnd=%RTfoff BestFitSize=%u)\n",
1968 pEntryBestFit ? "B" : "No b",
1969 off,
1970 pEntryBestFit ? pEntryBestFit->Core.Key : 0,
1971 pEntryBestFit ? pEntryBestFit->Core.KeyLast : 0,
1972 pEntryBestFit ? pEntryBestFit->cbData : 0));
1973
1974 if (pEntryBestFit && ((off + (RTFOFF)cbWrite) > pEntryBestFit->Core.Key))
1975 {
1976 cbToWrite = pEntryBestFit->Core.Key - off;
1977 pdmacFileEpCacheEntryRelease(pEntryBestFit);
1978 }
1979 else
1980 {
1981 if (pEntryBestFit)
1982 pdmacFileEpCacheEntryRelease(pEntryBestFit);
1983
1984 cbToWrite = cbWrite;
1985 }
1986
1987 cbWrite -= cbToWrite;
1988
1989 STAM_COUNTER_INC(&pCache->cMisses);
1990
1991 uint8_t *pbBuffer = NULL;
1992
1993 pdmacFileCacheLockEnter(pCache);
1994 bool fEnough = pdmacFileCacheReclaim(pCache, cbToWrite, true, &pbBuffer);
1995
1996 if (fEnough)
1997 {
1998 LogFlow(("Evicted enough bytes (%u requested). Creating new cache entry\n", cbToWrite));
1999
2000 PPDMACFILECACHEENTRY pEntryNew;
2001
2002 pEntryNew = pdmacFileCacheEntryAlloc(pCache, pEndpoint, off, cbToWrite, pbBuffer);
2003 AssertPtr(pEntryNew);
2004
2005 pdmacFileCacheEntryAddToList(&pCache->LruRecentlyUsedIn, pEntryNew);
2006 pdmacFileCacheAdd(pCache, cbToWrite);
2007 pdmacFileCacheLockLeave(pCache);
2008
2009 pdmacFileEpCacheInsertEntry(pEndpointCache, pEntryNew);
2010
2011 pdmacFileEpCacheCopyFromIoMemCtx(&IoMemCtx,
2012 pEntryNew->pbData,
2013 cbToWrite);
2014 ASMAtomicSubS32(&pTask->cbTransferLeft, cbToWrite);
2015
2016 bool fCommit = pdmacFileCacheAddDirtyEntry(pEndpointCache, pEntryNew);
2017 if (fCommit)
2018 pdmacFileCacheCommitDirtyEntries(pCache);
2019 pdmacFileEpCacheEntryRelease(pEntryNew);
2020 STAM_COUNTER_ADD(&pCache->StatWritten, cbToWrite);
2021 }
2022 else
2023 {
2024 pdmacFileCacheLockLeave(pCache);
2025
2026 /*
2027 * There is not enough free space in the cache.
2028 * Pass the request directly to the I/O manager.
2029 */
2030 LogFlow(("Couldn't evict %u bytes from the cache. Remaining request will be passed through\n", cbToWrite));
2031
2032 pdmacFileEpCacheRequestPassthrough(pEndpoint, pTask,
2033 &IoMemCtx, off, cbToWrite,
2034 PDMACTASKFILETRANSFER_WRITE);
2035 }
2036 }
2037
2038 off += cbToWrite;
2039 }
2040
2041 ASMAtomicWriteBool(&pTask->fCompleted, false);
2042
2043 if (ASMAtomicReadS32(&pTask->cbTransferLeft) == 0
2044 && !ASMAtomicXchgBool(&pTask->fCompleted, true))
2045 pdmR3AsyncCompletionCompleteTask(&pTask->Core, false);
2046 else
2047 rc = VINF_AIO_TASK_PENDING;
2048
2049 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2050
2051 return rc;
2052}
2053
2054int pdmacFileEpCacheFlush(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMASYNCCOMPLETIONTASKFILE pTask)
2055{
2056 int rc = VINF_SUCCESS;
2057
2058 LogFlowFunc((": pEndpoint=%#p{%s} pTask=%#p\n",
2059 pEndpoint, pEndpoint->Core.pszUri, pTask));
2060
2061 if (ASMAtomicReadPtr((void * volatile *)&pEndpoint->DataCache.pTaskFlush))
2062 rc = VERR_RESOURCE_BUSY;
2063 else
2064 {
2065 /* Check for dirty entries in the cache. */
2066 pdmacFileCacheEndpointCommit(&pEndpoint->DataCache);
2067 if (ASMAtomicReadU32(&pEndpoint->DataCache.cWritesOutstanding) > 0)
2068 {
2069 ASMAtomicWritePtr((void * volatile *)&pEndpoint->DataCache.pTaskFlush, pTask);
2070 rc = VINF_AIO_TASK_PENDING;
2071 }
2072 else
2073 pdmR3AsyncCompletionCompleteTask(&pTask->Core, false);
2074 }
2075
2076 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2077 return rc;
2078}
2079
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette