diff -r pario/web/bibtex/barve:competitive2.bib pario/web/new/barve:competitive2.bib 12c12,43 < keyword = {disk prefetching, file caching, parallel I/O, pario-bib} --- > keyword = {disk prefetching, file caching, parallel I/O, pario-bib}, > abstract = {We provide a competitive analysis framework for online > prefetching and buffer management algorithms in parallel I/O systems, using a > read-once model of block references. This has widespread applicability to key > I/O-bound applications such as external merging and concurrent playback of > multiple video streams. Two realistic lookahead models, global lookahead and > local lookahead, are defined. Algorithms NOM and GREED based on these two > forms of lookahead are analyzed for shared buffer and distributed buffer > configurations, both of which occur frequently in existing systems. An > important aspect of our work is that we show how to implement both the models > of lookahead in practice using the simple techniques of forecasting and > flushing. \par Given a D-disk parallel I/O system and a globally shared I/O > buffer that can hold upto M disk blocks, we derive a lower bound of > $\Omega(\sqrt{D}$) on the competitive ratio of any deterministic online > prefetching algorithm with O(M) lookahead. NOM is shown to match the lower > bound using global M-block lookahead. In contrast, using only local lookahead > results in an $\Omega(D)$ competitive ratio. When the buffer is distributed > into D portions of M/D blocks each, the algorithm GREED based on local > lookahead is shown to be optimal, and NOM is within a constant factor of > optimal. Thus we provide a theoretical basis for the intuition that global > lookahead is more valuable for prefetching in the case of a shared buffer > configuration whereas it is enough to provide local lookahead in case of the > distributed configuration. Finally, we analyze the performance of these > algorithms for reference strings generated by a uniformly-random stochastic > process and we show that they achieve the minimal expected number of I/Os. > These results also give bounds on the worst-case expected performance of > algorithms which employ randomization in the data layout.}, > comment = {See also barve:competitive. They propose two methods for > scheduling prefetch operations in the situation where the access pattern is > largely known in advance, in such a way as to minimize the total number of > parallel I/Os. The two methods are quite straightforward, and yet match the > optimum lower bound for an on-line algorithm.} diff -r pario/web/bibtex/cormen:fft3.bib pario/web/new/cormen:fft3.bib 12a13,29 > abstract = {This paper extends an earlier out-of-core Fast Fourier Transform > (FFT) method for a uniprocessor with the Parallel Disk Model (PDM) to use > multiple processors. Four out-of-core multiprocessor methods are examined. > Operationally, these methods differ in the size of "mini-butterfly" computed > in memory and how the data are organized on the disks and in the distributed > memory of the multiprocessor. The methods also perform differing amounts of > I/O and communication. Two of them have the remarkable property that even > though they are computing the FFT on a multiprocessor, all interprocessor > communication occurs outside the mini-butterfly computations; communication > that ordinarily occurs in a butterfly is folded into other data-movement > operations. An analysis program shows that the two methods that use no > butterfly communication usually use less communication overall than the other > methods. The analysis program is fast enough that it can be invoked at run > time to determine which of the four methods uses the least communication. One > set of performance results on a small workstation cluster indicates that the > methods without butterfly communication are approximately 9.5\% faster. > Moreover, they are much easier to implement.}, 14c31,34 < in the out-of-core FFT into a single BMMC permutation between} --- > in the out-of-core FFT into a single BMMC permutation between "super-levels", > where each super-level involves log(M) stages of the FFT. This usually leads > to less communication and to better overall performance. See also cormen:fft > and cormen:fft2.} diff -r pario/web/bibtex/cortes:cooperative.bib pario/web/new/cortes:cooperative.bib 12c12,29 < pario-bib} --- > pario-bib}, > abstract = {In this paper, we examine some of the important problems observed > in the design of cooperative caches. Solutions to the coherence, > load-balancing and fault-tolerance problems are presented. These solutions > have been implemented as a part of PAFS, a parallel/distributed file system, > and its performance has been compared to the one achieved by xFS. Using the > comparison results, we have observed that the proposed ideas not only solve > the main problems of cooperative caches, but also increase the overall system > performance. Although the solutions presented in this paper were targeted to > a parallel machine, reasonable good results have also been obtained for > networks of workstations.}, > comment = {They make the claim that it is better not to replicate data into > local client caches, rather, it is better to simply make remote read and > write requests to the cached block in whatever memory it may be. That reduces > the overhead (space and time) of replication and coherency, and leads to > better performance. They also present a range of parity-based fault-tolerance > mechanisms, and a load-balancing technique that reassigns cache buffers to > cache-manager processes.} diff -r pario/web/bibtex/kandaswamy:hartree-fock.bib pario/web/new/kandaswamy:hartree-fock.bib 9a10 > earlier = {kandaswamy:hartree}, diff -r pario/web/bibtex/kandaswamy:hartree.bib pario/web/new/kandaswamy:hartree.bib 12,13c12,14 < note = {To appear}, < keyword = {verify pages, parallel I/O, scientific computing, pario-bib}, --- > later = {kandaswamy:hartree-fock}, > URL = {http://scxy.tc.cornell.edu/sc97/proceedings/TECH/KANDASWA/INDEX.HTM}, > keyword = {parallel I/O, scientific computing, pario-bib}, 32c33,35 < improvement in the overall application performance with these optimizations.} --- > improvement in the overall application performance with these > optimizations.}, > comment = {No page numbers: proceedings on CDROM and web only.} diff -r pario/web/bibtex/madhyastha:adaptive.bib pario/web/new/madhyastha:adaptive.bib 9a10 > later = {madhyastha:thesis}, 25c26,27 < benchmarks and input/output intensive scientific applications.} --- > benchmarks and input/output intensive scientific applications.}, > comment = {See also madhyastha:thesis, and related papers.} diff -r pario/web/bibtex/madhyastha:classification.bib pario/web/new/madhyastha:classification.bib 11a12 > later = {madhyastha:thesis}, 29c30,31 < much better performance in future runs. See also her paper in SC97.} --- > much better performance in future runs. See also madhyastha:thesis, and > related papers.}