@Article{garcia:expand-design, author = {F\'elix Garcia-Carballeira and Alejandro Calderon and Jesus Carretero and Javier Fernandez and Jose M. Perez}, title = {The Design of the {Expand} Parallel File System}, journal = {The International Journal of High Performance Computing Applications}, year = {2003}, volume = {17}, number = {1}, pages = {21--38}, publisher = {Sage Science Press}, URL = {http://www.sagepub.co.uk/JournalIssueAbstract.aspx?pid=105593&jiid=515697&jiaid=33121}, keyword = {parallel file system, parallel I/O, pario-bib}, abstract = {This article describes an implementation of MPI-IO using a new parallel file system, called Expand (Expandable Parallel File System), which is based on NFS servers. Expand combines multiple NFS servers to create a distributed partition where files are striped. Expand requires no changes to the NFS server and uses RPC operations to provide parallel access to the same file. Expand is also independent of the clients, because all operations are implemented using RPC and NFS protocols. Using this system, we can join heterogeneous servers (Linux, Solaris, Windows 2000, etc.) to provide a parallel and distributed partition. The article describes the design, implementation and evaluation of Expand with MPI-IO. This evaluation has been made in Linux clusters and compares Expand and PVFS.} } @Article{isaila:clusterfile, author = {Florin Isaila and Walter F. Tichy}, title = {Clusterfile: a flexible physical layout parallel file system}, journal = {Concurrency and Computation}, year = {2003}, volume = {15}, number = {7/8}, pages = {653--679}, publisher = {Wiley}, URL = {http://www3.interscience.wiley.com/cgi-bin/abstract/104524121/ABSTRACT}, URLpdf = {http://www3.interscience.wiley.com/cgi-bin/fulltext/104524121/PDFSTART}, keyword = {parallel file system, parallel I/O, pario-bib}, abstract = {This paper presents Clusterfile, a parallel file system that provides parallel file access on a cluster of computers. We introduce a file partitioning model that has been used in the design of Clusterfile. The model uses a data representation that is optimized for multidimensional array partitioning while allowing arbitrary partitions. The paper shows how the file model can be employed for file partitioning into both physical subfiles and logical views. We also present how the conversion between two partitions of the same file is implemented using a general memory redistribution algorithm. We show how we use the algorithm to optimize non-contiguous read and write operations. The experimental results include performance comparisons with the Parallel Virtual File System (PVFS) and an MPI-IO implementation for PVFS.} } @Article{oldfield:restruct, author = {Ron Oldfield and David Kotz}, title = {Improving data access for computational grid applications}, journal = {Cluster Computing, The Journal of Networks, Software Tools and Applications}, year = {2003}, copyright = {the authors}, note = {Accepted for publication}, keyword = {parallel I/O, Grid computing, distributed computing, graph algorithms, pario-bib}, abstract = {High-performance computing increasingly occurs on ``computational grids'' composed of heterogeneous and geographically distributed systems of computers, networks, and storage devices that collectively act as a single ``virtual'' computer. A key challenge in this environment is to provide efficient access to data distributed across remote data servers. Our parallel I/O framework, called Armada, allows application and data-set providers to flexibly compose graphs of processing modules that describe the distribution, application interfaces, and processing required of the dataset before computation. Although the framework provides a simple programming model for the application programmer and the data-set provider, the resulting graph may contain bottlenecks that prevent efficient data access. In this paper, we present an algorithm used to restructure Armada graphs that distributes computation and data flow to improve performance in the context of a wide-area computational grid.} }