@Article{gava:parallel-ml, author = {Fr\'{e}d\'{e}ric Gava}, title = {Parallel {I/O} in bulk-synchronous parallel {ML}}, journal = {Lecture Notes in Computer Science}, booktitle = {4th International Conference on Computational Science (ICCS 2004); June 6-9, 2004; Krakow, POLAND}, editor = {Bubak, M; VanAlbada, GD; Sloot, PMA; Dongarra, JJ}, year = {2004}, month = {June}, volume = {3038}, pages = {331--338}, institution = {Univ Paris 12, LACL, Creteil, France}, publisher = {Springer-Verlag}, copyright = {(c)2004 Institute for Scientific Information, Inc.}, URL = {http://springerlink.metapress.com/openurl.asp?genre=article&issn=0302-9743&volume=3038&spage=50}, keywords = {parallel I/O, parallel ML, BSML, data parallel language, pario-bib}, abstract = {Bulk Synchronous Parallel ML or BSML is a functional data-parallel language for programming bulk synchronous parallel (BSP) algorithms. The execution time can be estimated and dead-locks and indeterminism are avoided. For large scale applications where parallel processing is helpful and where the total amount of data often exceeds the total main memory available, parallel disk I/O becomes a necessity. We present here a library of I/O features for BSML and its cost model.} } @Article{hwang:pvfs-cache, author = {In-Chul Hwang and Hojoong Kim and Hanjo Jung and Dong-Hwan Kim and Hojin Ghim and Seung-Ryoul Maeng and Jung-Wan Cho}, title = {Design and implementation of the cooperative cache for {PVFS}}, journal = {Lecture Notes in Computer Science}, booktitle = {4th International Conference on Computational Science (ICCS 2004); June 6-9, 2004; Krakow, POLAND}, editor = {BuBak, M; VanAlbada, GD; Sloot, PMA; Dongarra, JJ}, year = {2004}, month = {June}, volume = {3036}, pages = {43--50}, institution = {Korea Adv Inst Sci \& Technol, Dept Elect Engn \& Comp Sci, Div Comp Sci, 373-1 Kusung Dong, Taejon 305701, South Korea; Korea Adv Inst Sci \& Technol, Dept Elect Engn \& Comp Sci, Div Comp Sci, Taejon 305701, South Korea}, publisher = {Springer-Verlag}, copyright = {(c)2004 Institute for Scientific Information, Inc.}, URL = {http://springerlink.metapress.com/openurl.asp?genre=article&issn=0302-9743&volume=3036&spage=43}, keywords = {PVFS, cooperative cache, pario-bib}, abstract = {Recently, there have been many efforts to get high performance in cluster computing with inexpensive PCs connected through high-speed networks. Some of them were to provide high bandwidth and parallelism in file service using a distributed file system. Other researches for distributed file systems include the cooperative cache that reduces servers' load and improves overall performance. The cooperative cache shares file caches among clients so that a client can request a file to another client, not to the server, through inter-client message passing. In various distributed file systems, PVFS (Parallel Virtual File System) provides high performance with parallel I/O in Linux widely used in cluster computing. However, PVFS doesn't support any file cache facility. This paper describes the design and implementation of the cooperative cache for PVFS (Coopc-PVFS). We show the efficiency of Coopc-PVFS in comparison to original PVFS. As a result, the response time of Coopc-PVFS is shorter than or similar to that of original PVFS.} } @Article{marco:raid1, author = {R. Marco and J. Marco and D. Rodriguez and D. Cano and I. Cabrillo}, title = {{RAID-1} and data stripping across the {GRID}}, journal = {Lecture Notes in Computer Science}, booktitle = {1st European Across Grids Conference; February 13-14, 2003; Santiago de Compostela, SPAIN}, editor = {Rivera, FF; Bubak, M; Tato, AG; Doallo, R}, year = {2004}, month = {March}, volume = {2970}, pages = {119--123}, institution = {Univ Cantabria, CSIC, Inst Fis Cantabria, Avda Los Castros S-N, E-39005 Santander, Spain; Univ Cantabria, CSIC, Inst Fis Cantabria, E-39005 Santander, Spain}, publisher = {Springer-Verlag}, copyright = {(c)2004 Institute for Scientific Information, Inc.}, URL = {http://springerlink.metapress.com/openurl.asp?genre=article&issn=0302-9743&volume=2970&spage=119}, keywords = {RAID, RAID-1, data striping, GRID, pario-bib}, abstract = {Stripping techniques combined with an adequate replication policy across the Grid offer the possibility to improve significatively data access and processing times, while eliminating the need for local data mirroring, so saving significatively on storage costs. First results on a local cluster following a simple strategy are presented.} } @Article{menor:grid-io, author = {Jos\'{e} M. P\'{e}rez Menor and F\'{e}lix Garc\'{\i}a and Jes\'{u}s Carretero and Alejandro Calder\'{o}n and Javier Fern\'{a}ndez and Jos\'{e} Daniel Garc\'{\i}a}, title = {A parallel {I/O} middleware to integrate heterogeneous storage resources on grids}, journal = {Lecture Notes in Computer Science}, booktitle = {1st European Across Grids Conference; February 13-14, 2003; Santiago de Compostela, SPAIN}, editor = {Rivera, FF; Bubak, M; Tato, AG; Doallo, R}, year = {2004}, month = {March}, volume = {2970}, pages = {124--131}, institution = {Univ Carlos III Madrid, Comp Architecture Grp, Dept Comp Sci, Madrid, Spain}, publisher = {Springer-Verlag}, copyright = {(c)2004 Institute for Scientific Information, Inc.}, URL = {http://springerlink.com/openurl.asp?genre=article&issn=0302-9743&volume=2970&spage=124}, keywords = {data grids, parallel I/O, data declustering, pario-bib}, abstract = {The philosophy behind grid is to use idle resources to achieve a higher level of computational services (computation, storage, etc). Existing data grids solutions are based in new servers, specific APIs and protocols, however this approach is not a realistic solution for enterprises and universities, because this supposes the deployment of new data servers across the company. This paper describes a new approach to data access in computational grids. This approach is called GridExpand, a parallel I/O middleware that integrates heterogeneous data storage resources in grids. The proposed grid solution integrates available data network solutions (NFS, CIFS, WebDAV) and makes possible the access to a global grid file system. Our solution differs from others because it does not need the installation of new data servers with new protocols. Most of the data grid solutions use replication as the way to obtain high performance. Replication, however, introduce consistency problem for many collaborative applications, and sometimes requires the usage of lots of resources. To obtain high performance, we apply the parallel I/O techniques used in parallel file systems.} } @Article{perez:hints, author = {Mar\'{i}a S. P\'{e}rez and Albert S\'{a}nchez and V\'{\i}ctor Robles and Jos\'{e} Pe{\~{n}}a and Fernando P\'{e}rez}, title = {Optimizations based on hints in a parallel file system}, journal = {Lecture Notes in Computer Science}, booktitle = {4th International Conference on Computational Science (ICCS 2004); June 6-9, 2004; Krakow, POLAND}, editor = {Bubak, M; VanAlbada, GD; Sloot, PMA; Dongarra, JJ}, year = {2004}, month = {June}, volume = {3038}, pages = {347--354}, institution = {Univ Politecn Madrid, DATSI FI, E-28040 Madrid, Spain}, publisher = {Springer-Verlag}, copyright = {(c)2004 Institute for Scientific Information, Inc.}, URL = {http://springerlink.metapress.com/openurl.asp?genre=article&issn=0302-9743&volume=3038&spage=347}, keywords = {parallel I/O, optimizations, caching, prefetching, hints, pario-bib}, abstract = {Existing parallel file systems provide applications a little control for optimizing I/O accesses. Most of these systems use optimization techniques transparent to the applications, limiting the performance achieved by these solutions. Furthermore, there is a big gap between the interface provided by parallel file systems and the needs of applications. In fact, most of the parallel file systems do not use intuitive I/O hints or other optimizations approaches. In this sense, applications programmers cannot take advantage of optimization techniques suitable for the application domain. This paper describes I/O optimizations techniques used in MAPFS, a multiagent I/O architecture. These techniques are configured by means of a double interface for specifying access patterns or hints that increase the performance of I/O operations. An example of this interface is shown.} } @Article{shi:dma-raid, author = {Zhan Shi and Jiangling Zhang and Xinrong Zhou}, title = {Using {DMA} aligned buffer to improve software RAID performance}, journal = {Lecture Notes in Computer Science}, booktitle = {4th International Conference on Computational Science (ICCS 2004); June 6-9, 2004; Krakow, POLAND}, editor = {Bubak, M; VanAlbada, GD; Sloot, PMA; Dongarra, JJ}, year = {2004}, month = {June}, volume = {3038}, pages = {355--362}, institution = {Huazhong Univ Sci \& Technol, Dept Comp Sci, Natl Storage Lab, Wuhan 430074, Peoples R China; Abo Akad Univ, Turku Ctr Comp Sci, FIN-20520 Turku, Finland}, publisher = {Springer-Verlag}, copyright = {(c)2004 Institute for Scientific Information, Inc.}, URL = {http://springerlink.metapress.com/openurl.asp?genre=article&issn=0302-9743&volume=3038&spage=355}, keywords = {DMA, software RAID, performance, DMA aligned buffer, DAB, pario-bib}, abstract = {While the storage market grows rapidly, software RAID, as a low-cost solution, becomes more and more important nowadays. However the performance of software RAID is greatly constrained by its implementation. Varies methods have been taken to improve its performance. By integrating a novel buffer mechanism - DMA aligned buffer (DAB) into software RAID kernel driver, we achieved a significant performance improvement, especially on small I/O requests.} } @Article{thomasian:allocation, author = {Alexander Thomasian}, title = {Data allocation and scheduling in disks and disk arrays}, journal = {Lecture Notes in Computer Science}, booktitle = {IEEE/CS Symposium on Modeling, Analysis, and Simulation of Computer and Telecommunication Systems; October 12, 2003; Orlando, FL}, editor = {Calzarossa, MC; Gelenbe, E}, year = {2004}, month = {April}, volume = {2965}, pages = {357--384}, institution = {New Jersey Inst Technol, Dept Comp Sci, Newark, NJ 07102 USA}, publisher = {Springer-Verlag}, copyright = {(c)2004 Institute for Scientific Information, Inc.}, URL = {http://springerlink.metapress.com/openurl.asp?genre=article&issn=0302-9743&volume=2965&spage=357}, keywords = {data allocation, scheduling, disk arrays, pario-bib}, abstract = {Magnetic disks, which together with disk arrays constitute a multibillion dollar industry, were developed in 1950s. Disks were an advance over magnetic drums, which had a dedicated read/write head per track, since much higher amounts of data could be accessed in a cost effective manner due to the sharability of the movable read/write heads. DRAM memories, which are volatile, were projected to replace disks a decade ago (see Section 2.4 in [33]). This did not materialize due to the inherent volatility of DRAM, i.e., a power source is required to ensure that DRAM contents are not lost, but also due to recent dramatic increases in areal recording density and hence disk capacity, which is estimated at 60% compound annual growth rate - CAGR. This has resulted in a rapid decrease in cost per megabyte of disk capacity, so that it is lower than DRAM by a factor of 1000 to one.} }