@Article{carballeira:adaptive, author = {Felix Garcia-Carballeira and Jesus Carretero and Alejandro Calderon and Jose M. Perez and Jose D. Garcia}, title = {An adaptive cache coherence protocol specification for parallel input/output systems}, journal = {IEEE Transactions on Parallel and Distributed Systems}, year = {2004}, month = {June}, volume = {15}, number = {6}, pages = {533--545}, institution = {Univ Carlos III Madrid, Comp Architecture Grp, Madrid 28911, Spain; Univ Carlos III Madrid, Comp Architecture Grp, Madrid 28911, Spain}, publisher = {IEEE COMPUTER SOC}, copyright = {(c)2004 Institute for Scientific Information, Inc.}, URL = {http://csdl.computer.org/comp/trans/td/2004/06/l0533abs.htm}, keywords = {parallel file system, caching, cache coherence, adaptive caching, protocol specification, pario-bib}, abstract = {Caching has been intensively used in memory and traditional file systems to improve system performance. However, the use of caching in parallel file systems and I/O libraries has been limited to I/O nodes to avoid cache coherence problems. In this paper, we specify an adaptive cache coherence protocol very suitable for parallel file systems and parallel I/O libraries. This model exploits the use of caching, both at processing and I/O nodes, providing performance increase mechanisms as aggressive prefetching and delayed-write techniques. The cache coherence problem is solved by using a dynamic scheme of cache coherence protocols with different sizes and shapes of granularity. The proposed model is very appropriate for parallel I/O interfaces, as MPI-IO. Performance results, obtained on an IBM SP2, are presented to demonstrate the advantages offered by the cache management methods proposed.} } @InProceedings{ching:efficient, author = {Avery Ching and Alok Choudhary and Wei-keng Liao and Robert Ross and William Gropp}, title = {Efficient structured data access in parallel file systems}, booktitle = {Proceedings of the IEEE International Conference on Cluster Computing}, year = {2003}, month = {December}, pages = {326--335}, institution = {Northwestern Univ, Dept Elect \& Comp Engn, Evanston, IL 60208 USA}, publisher = {IEEE Computer Society Press}, address = {Hong Kong, China}, URL = {http://www.ece.northwestern.edu/~wkliao/SciDAC/Publications/cluster2003.pdf}, keywords = {I/O interface, high-level libraries, PVFS, structured data representations, pario-bib}, abstract = {Parallel scientific applications store and retrieve very large, structured datasets. Directly supporting these structured accesses is an important step in providing high-performance I/O solutions for these applications. High-level interfaces such as HDF5 and Parallel netCDF provide convenient APIs for accessing structured datasets, and the MPI-IO interface also supports efficient access to structured data. However, parallel file systems do not traditionally support such access. In this work, we present an implementation of structured data access support in the context of the Parallel Virtual File System (PVFS). We call this support "datatype I/O" because of its similarity to MPI datatypes. This support is built by using a reusable datatype-processing component from the MPICH2 MPI implementation. We describe how this component is leveraged to efficiently process structured data representations resulting from MPI-IO operations. We quantitatively assess the solution using three test applications. We also point to further optimizations in the processing path that could be leveraged for even more efficient operation.}, comment = {not read, don't have} } @InProceedings{hacker:effects, author = {Thomas J. Hacker and Brian Noble and Brian D. Athey}, title = {The Effects of Systemic Packet Loss on Aggregate {TCP} Flows}, booktitle = {Proceedings of SC2002: High Performance Networking and Computing}, year = {2002}, month = {November}, address = {Baltimore, MD}, URL = {http://www-personal.engin.umich.edu/~hacker/papers/SC_2002_full.pdf}, keywords = {network congestion, parallel tcp streams, transport protocols, pario-bib} } @InProceedings{hacker:fairness, author = {Thomas J. Hacker and Brian Noble and Brian D. Athey}, title = {Improving Throughput and Mantaining Fairness using Parallel {TCP}}, booktitle = {The 23rd Conference on the IEEE Communications Society (INFOCOM)}, year = {2004}, month = {March}, publisher = {IEEE Computer Society Press}, address = {Hong Kong}, URL = {http://www.ieee-infocom.org/2004/Papers/52_1.PDF}, keywords = {network congestion, parallel tcp streams, fairness, transport protocols, pario-bib}, comment = {Also see earlier hacker:parallel-tcp and hacker:effects} } @InProceedings{hacker:parallel-tcp, author = {Thomas J. Hacker and Brian D. Athey and Brian Noble}, title = {The end-to-end performance effects of parallel {TCP} sockets on a lossy wide-area network.}, booktitle = {Proceedings of the International Parallel and Distributed Processing Symposium}, year = {2002}, month = {April}, pages = {434--443}, publisher = {IEEE Computer Society Press}, copyright = {(c)2004 IEE}, address = {Fort Lauderdale, Florida}, URL = {http://www-personal.engin.umich.edu/~hacker/papers/IPDPS.PDF}, keywords = {network congestion, parallel tcp streams, transport protocols, pario-bib}, abstract = {This paper examines the effects of using parallel TCP flows to improve end-to-end network performance for distributed data intensive applications. A series of transmission experiments were conducted over a wide-area network to assess how parallel flows improve throughput, and to understand the number of flows necessary to improve throughput while avoiding congestion. An empirical throughput expression for parallel flows based on experimental data is presented, and guidelines for the use of parallel flows are discussed. (45 refs.)} } @InProceedings{isaila:viewio, author = {Florin Isaila and Walter F. Tichy}, title = {View I/O: improving the performance of non-contiguous I/O.}, booktitle = {IEEE International Conference on Cluster Computing}, year = {2003}, month = {December}, pages = {336--343}, publisher = {IEEE Computer Society Press}, address = {Hong Kong, China}, URL = {http://www.ipd.uka.de/~florin/Publications/mypaper.pdf}, keywords = {non-contiguous I/O, parallel file structure, pario-bib}, abstract = {This paper presents view I/O, a non-contiguous parallel I/O technique. We show that the linear file model may be an unsuitable abstraction for non-contiguous I/O optimizations. Additionally, the poor cooperation between a file system and an I/O library like MPI-IO may drastically affect the performance. View I/O has detailed knowledge about parallel structure of a file and about the potential access pattern and exploits it in order to improve performance. The access overhead is reduced by using a strategy "declare once, use several times" and by file off-set compaction. We compare and contrast view I/O with other non-contiguous I/O methods. Our measurements on a cluster of computers indicate a significant performance improvement over other approaches. (15 refs.)} } @Article{shen:data-management, author = {X. H. Shen and W. K. Liao and A. Chouldhary and G. Memik and M. Kandemir}, title = {A high-performance application data environment for large-scale scientific computations}, journal = {IEEE Transactions on Parallel and Distributed Systems}, year = {2003}, month = {December}, volume = {14}, number = {12}, pages = {1262--1274}, keywords = {data management, scientific applications, workflow, parallel file systems, pario-bib}, abstract = {Effective high-level data management is becoming an important issue with more and more scientific applications manipulating huge amounts of secondary-storage and tertiary-storage data using parallel processors. A major problem facing the current solutions to this data management problem is that these solutions either require a deep understanding of specific data storage architectures and file layouts to obtain the best performance (as in high-performance storage management systems and parallel file systems), or they sacrifice significant performance in exchange for ease-of-use and portability (as in traditional database management systems). We discuss the design, implementation, and evaluation of a novel application development environment for scientific computations. This environment includes a number of components that make it easy for the programmers to code and run their applications without much programming effort and, at the same time, to harness the available computational and storage power on parallel architectures. (39 refs.)} } @Article{varki:issues, author = {E. Varki and A. Merchant and J. Z. Xu and X. Z. Qiu}, title = {Issues and challenges in the performance analysis of real disk arrays}, journal = {IEEE Transactions on Parallel and Distributed Systems}, year = {2004}, month = {June}, volume = {15}, number = {6}, pages = {559 -- 574}, institution = {Univ New Hampshire, Dept Comp Sci, Nesmith Hall, Durham, NH 03824 USA; Univ New Hampshire, Dept Comp Sci, Durham, NH 03824 USA; Hewlett Packard Labs, Storage Syst Dept, Palo Alto, CA 94304 USA; Falconstor Software Inc, Melville, NY 11747 USA}, publisher = {IEEE Computer Society Press}, copyright = {(c)2004 Institute for Scientific Information, Inc.}, keywords = {performance analysis, disk arrays, performance modeling, pario-bib}, abstract = {The performance modeling and analysis of disk arrays is challenging due to the presence of multiple disks, large array caches, and sophisticated array controllers. Moreover, storage manufacturers may not reveal the internal algorithms implemented in their devices, so real disk arrays are effectively black-boxes. We use standard performance techniques to develop an integrated performance model that incorporates some of the complexities of real disk arrays. We show how measurement data and baseline performance models can be used to extract information about the various features implemented in a disk array. In this process, we identify areas for future research in the performance analysis of real disk arrays.} } @InProceedings{wu:noncontiguous, author = {Jiesheng Wu and Pete Wyckoff and Dhabaleswar Panda}, title = {Supporting efficient noncontiguous access in {PVFS} over {I}nfiniBand}, booktitle = {Proceedings of the IEEE International Conference on Cluster Computing}, year = {2003}, month = {December}, pages = {344--351}, institution = {Ohio State Univ, Columbus, OH 43210 USA}, publisher = {IEEE Computer Society Press}, copyright = {(c)2004 Institute for Scientific Information, Inc.}, address = {Hong Kong, China}, URL = {http://nowlab.cis.ohio-state.edu/projects/mpi-iba/publication/wuj-cluster03.pdf}, keywords = {noncontiguous access patterns, PVFS, Infiniband, RDMA, pario-bib}, abstract = {Noncontiguous I/O access is the main access pattern in many scientific applications. Noncontiguity exists both in access to files and in access to target memory regions on the client. This characteristic imposes a requirement of native noncontiguous I/O access support in cluster file systems for high performance. In this paper we address noncontiguous data transmission between the client and the I/O server in cluster file systems over a high performance network. We propose a novel approach, RDMA Gather/Scatter, to transfer noncontiguous data for such I/O accesses. We also propose a new scheme, Optimistic Group Registration, to reduce memory registration costs associated with this approach. We have designed and incorporated this approach in a version of PVFS over InfiniBand. Through a range of PVFS and MPI-IO micro-benchmarks, and the NAS BTIO benchmark, we demonstrate that our approach attains significant performance gains compared to other existing approaches.} }