@Article{cecchet:raidb, author = {Emmanuel Cecchet and Julie Marguerite and Willy Zwaenepoel}, title = {Partial replication: Achieving scalability in redundant arrays of inexpensive databases}, journal = {Lecture Notes in Computer Science}, booktitle = {7th International Conference on  Principles of Distributed Systems (OPODIS 2003); December 10-13, 2003; MARTINIQUE}, editor = {Papatrianatafilou, M; Hunel, P}, year = {2004}, month = {July}, volume = {3144}, pages = {58--70}, publisher = {Springer-Verlag Heidelberg}, copyright = {(c)2004 Institute for Scientific Information, Inc.}, URL = {http://springerlink.metapress.com/link.asp?id=kay13m7clgg75utk}, keywords = {replication strategies, RAIDb, database, pario-bib}, abstract = {Clusters of workstations become more and more popular to power data server applications such as large scale Web sites or e-Commerce applications. There has been much research on scaling the front tiers (web servers and application servers) using clusters, but databases usually remain on large dedicated SMP machines. In this paper, we focus on the database tier using clusters of commodity hardware. Our approach consists of studying different replication strategies to achieve various degree of performance and fault tolerance.  Redundant Array of Inexpensive Databases (RAIDb) is to databases what RAID is to disks. In this paper, we focus on RAIDb-1 that offers full replication and RAIDb-2 that introduces partial replication, in which the user can define the degree of replication of each database table.  We present a Java implementation of RAIDb called Clustered JDBC or C-JDBC. C-JDBC achieves both database performance scalability and high availability at the middleware level without changing existing applications. We show, using the TPC-W benchmark, that partial replication (RAIDb-2) can offer better performance scalability (up to 25\%) than full replication by allowing fine-grain control on replication. Distributing and restricting the replication of frequently written tables to a small set of backends reduces I/O usage and improves CPU utilization of each cluster node.} } @Article{feng:io-response, author = {Dan Feng and Hong Jiang and Yifeng Zhu}, title = {{I/O} response time in a fault-tolerant parallel virtual file system}, journal = {Lecture Notes in Computer Science}, booktitle = {IFIP International Conference on Network and Parallel Computing; October 18-20, 2004; Wuhan, PEOPLES R CHINA}, editor = {Jin, H; Gao, GR; Xu, ZW; Chen, H}, year = {2004}, month = {October}, volume = {3222}, pages = {248--251}, institution = {Huazhong Univ Sci \& Technol, Coll Comp, Minist Educ, Key Lab Data Storage Syst, Wuhan 430074, Peoples R China; Univ Nebraska, Dept Comp Sci \& Engn, Lincoln, NE 68588 USA}, publisher = {Springer-Verlag Heidelberg}, copyright = {(c)2004 The Thomson Corporation}, URL = {http://springerlink.metapress.com/link.asp?id=484ru5hgyxegr5r2}, keywords = {fault-tolerance, PVFS, perforamance analysis, pario-bib}, abstract = {A fault tolerant parallel virtual file system is designed and implemented to provide high I/O performance and high reliability. A queuing model is used to analyze in detail the average response time when multiple clients access the system. The results show that I/O response time is with a function of several operational parameters. It decreases with the increase in I/O buffer hit rate for read requests, write buffer size for write requests and number of server nodes in the parallel file system, while higher I/O requests arrival rate increases I/O response time.} } @Article{gropp:io-redundancy, author = {William D. Gropp and Robert Ross and Neill Miller}, title = {Providing efficient {I/O} redundancy in {MPI} environments}, journal = {Lecture Notes in Computer Science}, booktitle = {11th European Parallel Virtural Machine and Message Passing Interface Users Group Meeting; September 19-22, 2004; Budapest, HUNGARY}, editor = {Kranzlmuller, D; Kacsuk, P; Dongarra, J}, year = {2004}, month = {November}, volume = {3241}, pages = {77--86}, institution = {Argonne Natl Lab, Div Math \& Comp Sci, 9700 S Cass Ave, Argonne, IL 60439 USA; Argonne Natl Lab, Div Math \& Comp Sci, Argonne, IL 60439 USA}, publisher = {Springer-Verlag Heidelberg}, copyright = {(c)2004 Institute for Scientific Information, Inc.}, URL = {http://www.springerlink.com/link.asp?id=wxx7xg3hb3xftx8b}, keywords = {fault-tolerance, single-disk failures, MPI-IO, pario-bib}, abstract = {Highly parallel applications often use either highly parallel file systems or large numbers of independent disks. Either approach can provide the high data rates necessary for parallel applications. However, the failure of a single disk or server can render the data useless. Conventional techniques, such as those based on applying erasure correcting codes to each file write, are prohibitively expensive for massively parallel scientific applications because of the granularity of access at which the codes are applied. In this paper we demonstrate a scalable method for recovering from single disk failures that is optimized for typical scientific data sets. This approach exploits coarser-grained (but precise) semantics to reduce the overhead of constructing recovery data and makes use of parallel computation (proportional to the data size and independent of number of processors) to construct data. Experiments are presented showing the efficiency of this approach on a cluster with independent disks, and a technique is described for hiding the creation of redundant data within the MPI-IO implementation.} } @InProceedings{isaila:integrating, author = {Florin Isaila and Guido Malpohl and Vlad Olaru and Gabor Szeder and Walter Tichy}, title = {Integrating collective {I/O} and cooperative caching into the "clusterfile" parallel file system}, booktitle = {Proceedings of the 18th Annual International Conference on Supercomputing}, year = {2004}, month = {July}, pages = {58--67}, publisher = {ACM Press}, copyright = {(c)2004 Elsevier Engineering Information, Inc.}, address = {Sain-Malo, France}, URL = {http://doi.acm.org/10.1145/1006209.1006219}, keywords = {disk-directed I/O, two-phase I/O, clusterfile parallel file system, cooperative cache, pario-bib}, abstract = {This paper presents the integration of two collective I/O techniques into the Clusterfile parallel file system : disk-directed I/O and two-phase I/O. We show that global cooperative cache management improves the collective I/O performance. The solution focuses on integrating disk parallelism with other types of parallelism: memory (by buffering and caching on several nodes), network (by parallel I/O scheduling strategies) and processors (by redistributing the I/O related computation over several nodes). The performance results show considerable throughput increases over ROMIO's extended two-phase I/O.} } @Article{krammer:marmot, author = {Bettina Krammer and Matthias S. M{\"u}ller and Michael M. Resch}, title = {{MPI I/O} analysis and error detection with {MARMOT}}, journal = {Lecture Notes in Computer Science}, booktitle = {Proceedings of the 11th European Parallel Virtural Machine and Message Passing Interface Users Group Meeting}, editor = {Kranzlmuller, D; Kacsuk, P; Dongarra, J}, year = {2004}, month = {September}, volume = {3241}, pages = {242--250}, institution = {Ctr High Performance Comp, Allmandring 30, D-70550 Stuttgart, Germany; Ctr High Performance Comp, D-70550 Stuttgart, Germany}, publisher = {SPRINGER-VERLAG BERLIN}, copyright = {(c)2004 Institute for Scientific Information, Inc.}, address = {Budapest, Hungary}, URL = {http://springerlink.metapress.com/link.asp?id=up8fqm0vlua6pjgl}, keywords = {MPI I/O, error detection, performance analysis, MARMOT, pario-bib}, abstract = {The most frequently used part of MPI-2 is MPI I/O. Due to the complexity of parallel programming in general, and of handling parallel I/O in particular, there is a need for tools that support the application development process. There axe many situations where incorrect usage of MPI by the application programmer can be automatically detected. In this paper we describe the MARMOT tool that uncovers some of these errors and we also analyze to what extent it is possible to do so for MPI I/O.} } @Article{latham:mpi-io-scalability, author = {Rob Latham and Rob Ross and Rajeev Thakur}, title = {The impact of file systems on {MPI-IO} scalability}, journal = {Lecture Notes in Computer Science}, booktitle = {11th European Parallel Virtural Machine and Message Passing Interface Users Group Meeting; September 19-22, 2004; Budapest, HUNGARY}, editor = {Kranzlmuller, D; Kacsuk, P; Dongarra, J}, year = {2004}, month = {November}, volume = {3241}, pages = {87--96}, institution = {Argonne Natl Lab, 9700 S Cass Ave, Argonne, IL 60439 USA; Argonne Natl Lab, Argonne, IL 60439 USA}, publisher = {Springer-Verlag Heidelberg}, copyright = {(c)2004 Institute for Scientific Information, Inc.}, URL = {http://www.springerlink.com/link.asp?id=m31px2lt90296b62}, keywords = {scalability analysis, MPI-IO, pario-bib}, abstract = {As the number of nodes in cluster systems continues to grow, leveraging scalable algorithms in all aspects of such systems becomes key to maintaining performance. While scalable algorithms have been applied successfully in some areas of parallel I/O, many operations are still performed in an uncoordinated manner. In this work we consider, in three file system scenarios, the possibilities for applying scalable algorithms to the many operations that make up the MPI-IO interface. From this evaluation we extract a set of file system characteristics that aid in developing scalable MPI-IO implementations.} } @Article{pinkenburg:tpo++, author = {Simon Pinkenburg and Wolfgang Rosenstiel}, title = {Parallel {I/O} in an object-oriented message-passing library}, journal = {Lecture Notes in Computer Science}, booktitle = {11th European Parallel Virtural Machine and Message Passing Interface Users Group Meeting; September 19-22, 2004; Budapest, HUNGARY}, editor = {Kranzlmuller, D; Kacsuk, P; Dongarra, J}, year = {2004}, month = {November}, volume = {3241}, pages = {251--258}, institution = {Univ Tubingen, Dept Comp Engn, Wilhelm Schickard Inst Informat, Sand 13, D-72076 Tubingen, Germany; Univ Tubingen, Dept Comp Engn, Wilhelm Schickard Inst Informat, D-72076 Tubingen, Germany}, publisher = {Springer-Verlag Heidelberg}, copyright = {(c)2004 Institute for Scientific Information, Inc.}, URL = {http://springerlink.metapress.com/link.asp?id=91qfhjbyrbgb7mhw}, keywords = {object-oriented message passing, TPO++, parallel I/O interface, pario-bib}, abstract = {The article describes the design and implementation of parallel I/O in the object-oriented message-passing library TPO++. TPO++ is implemented on top of the message passing standard MPI and provides an object-oriented, type-safe and data centric interface to message-passing. Starting with version 2, the MPI standard defines primitives for parallel I/O called MPI-IO. Based on this layer, we have implemented an object-oriented parallel I/O interface in TPO++. The project is part of our efforts to apply object-oriented methods to the development of parallel physical simulations. We give a short introduction to our message-passing library and detail its extension to parallel I/O. Performance measurements between TPO++ and MPI are compared and discussed.} } @Article{rajasekaran:out-of-core, author = {Sanguthevar Rajasekaran}, title = {Out-of-core computing on mesh connected computers}, journal = {Journal of Parallel and Distributed Computing}, year = {2004}, month = {November}, volume = {64}, number = {11}, pages = {1311--1317}, institution = {Univ Connecticut, Dept CSE, 371 Fairfield Rd, ITEB 257, Storrs, CT 06269 USA; Univ Connecticut, Dept CSE, Storrs, CT 06269 USA}, publisher = {Academic Press Inc. Elsevier Science}, copyright = {(c)2004 Elsevier Engineering Information, Inc.; The Thomson Corporation}, URL = {http://dx.doi.org/10.1016/j.jpdc.2004.08.003}, keywords = {out-of-core, sorting, parallel disk model, performance analysis, pario-bib}, abstract = {Several models of parallel disks are found in the literature. These models have been proposed to alleviate the I/O bottleneck arising in handling voluminous data. These models have the general theme of assuming multiple disks. For instance the parallel disks model assumes D disks and a single computer. It is also assumed that a block of data from each of the D disks can be fetched into the main memory in one parallel I/O operation. In this paper, we study a model where there are more than one processors and each processor has an associated disk. In addition to the I/O cost, one also has to account for the inter-processor communication costs. To begin with we study the mesh and we investigate the performance of the mesh with respect to out-of-core computing. As a case study we consider the problem of sorting. The goal of this paper is to study the properties of this model. CP 2004 Elsevier Inc. All rights reserved. (27 Refs.)} } @InProceedings{shah:algorithms, author = {Rahul Shah and Peter J. Varman and Jeffrey Scott Vitter}, title = {Online algorithms for prefetching and caching on parallel disks}, journal = {Annual ACM Symposium on Parallel Algorithms and Architectures}, booktitle = {Proceedings of the Sixteenth Symposium on Parallel Algorithms and Architectures}, year = {2004}, month = {June}, volume = {16}, pages = {255--264}, copyright = {(c)2004 Elsevier Engineering Information, Inc.}, howpublished = {SPAA 2004 - Sixteenth Annual ACM Symposium on Parallelism in Algorithms and Architectures; 2004; v.16; p.255-264}, address = {Barcelona, Spain}, URL = {http://doi.acm.org/10.1145/1007912.1007950}, keywords = {online algorithms, prefetching, caching, parallel disk model, threshold LRU, pario-bib}, abstract = {Parallel disks provide a cost effective way of speeding up I/Os in applications that work with large amounts of data. The main challenge is to achieve as much parallelism as possible, using prefetching to avoid bottlenecks in disk access. Efficient algorithms have been developed for some particular patterns of accessing the disk blocks, In this paper, we consider general request sequences. When the request sequence consists of unique block requests, the problem is called prefetching and is a well-solved problem for arbitrary request sequences. When the reference sequence can have repeated references to the same block, we need to devise an effective caching policy as well. While optimum offline algorithms have been recently designed for the problem, in the online case, no effective algorithm was previously known. Our main contribution is a deterministic online algorithm threshold-LRU which achieves O((MD/L) {sup 2/3}) competitive ratio and a randomized online algorithm threshold-MARK which achieves O({square root}(MD/L) log(MD/L)) competitive ratio for the caching/prefetching problem on the parallel disk model (PDM), where D is the number of disks, M is the size of fast memory buffer, and M + L is the amount of lookahead available in the request sequence. The best-known lower bound on the competitive ratio is {Omega}( {square root}MD/L) for lookahead L GRE M in both models. We also show that if the deterministic online algorithm is allowed to have twice the memory of the offline then a tight competitive ratio of {Theta}( {square root}MD/L) can be achieved. This problem generalizes the well-known paging problem on a single disk to the parallel disk model.} } @Article{shen:dpfs, author = {Xiaohui H. Shen and Alok Choudhary}, title = {A high-performance distributed parallel file system for data-intensive computations}, journal = {Journal of Parallel and Distributed Computing}, year = {2004}, month = {September}, volume = {64}, number = {10}, pages = {1157--1167}, institution = {Northwestern Univ, Dept Elect \& Comp Engn, Ctr Parallel \& Distributed Comp, Evanston, IL 60208 USA; Northwestern Univ, Dept Elect \& Comp Engn, Ctr Parallel \& Distributed Comp, Evanston, IL 60208 USA}, publisher = {ACADEMIC PRESS INC ELSEVIER SCIENCE}, copyright = {(c)2004 Institute for Scientific Information, Inc.}, URL = {http://dx.doi.org/10.1016/j.jpdc.2004.07.001}, keywords = {distributed file system, parallel file system, striping, pario-bib}, abstract = {One of the challenges brought by large-scale scientific applications is how to avoid remote storage access by collectively using sufficient local storage resources to hold huge amounts of data generated by the simulation while providing high-performance I/O. DPFS, a distributed parallel file system, is designed and implemented to address this problem. DPFS collects locally distributed and unused storage resources as a supplement to the internal storage of parallel computing systems to satisfy the storage capacity requirement of large-scale applications. In addition, like parallel file systems, DPFS provides striping mechanisms that divide a file into small pieces and distributes them across multiple storage devices for parallel data access. The unique feature of DPFS is that it provides three file levels with each file level corresponding to a file striping method. In addition to the traditional linear striping method, DPFS also provides a novel Multidimensional striping method that can solve performance problems of linear striping for many popular access patterns. Other issues such as load-balancing and user interface are also addressed in DPFS. (C) 2004 Elsevier Inc. All rights reserved.} } @Article{sun:dynamic, author = {Weitao T. Sun and Jiwu W. Shu and Weimin M. Zheng}, title = {Dynamic file allocation in Storage Area Networks with neural network prediction}, journal = {Lecture Notes in Computer Science}, booktitle = {International Symposium on Neural Networks (ISSN 2004); August 19-21, 2004; Dalian, PEOPLES R CHINA}, editor = {Yin, FL; Wang, J; Guo, CG}, year = {2004}, month = {June}, volume = {3174}, pages = {719--724}, institution = {Tsing Hua Univ, Dept Comp Sci \& Technol, Beijing 100084, Peoples R China}, publisher = {Springer-Verlag Heidelberg}, copyright = {(c)2004 Institute for Scientific Information, Inc.}, URL = {http://www.springerlink.com/link.asp?id=7t97qycr7awnbw6j}, keywords = {SAN, dynamic data reorganization, neural network, access pattern prediction, pario-bib}, abstract = {Disk arrays are widely used in Storage Area Networks (SANs) to achieve mass storage capacity and high level I/O parallelism. Data partitioning and distribution among the disks is a promising approach to minimize the file access time and balance the I/O workload. But disk I/O parallelism by itself does not guarantee the optimal performance of an application. The disk access rates fluctuate with time because of access pattern variations, which leads to a workload imbalance. The user access pattern prediction is of great importance to dynamic data reorganization between hot and cool disks. Data migration occurs according to current and future disk allocation states and access frequencies. The objective of this paper is to develop a neural network based disk allocation trend prediction method and optimize the disks' file capacity to their balanced level. A Levenberg-Marquardt neural network was adopted to predict the disk access frequencies with the I/O track. History. Data reorganization on disk arrays was optimized to provide a good workload balance. The simulation results proved that the proposed method performs well.} }