@Article{chiu:smart-disks, author = {Steve Chiu and Wei-keng Liao and Mahmut Kandemir}, title = {Processor-embedded distributed smart disks for I/O-intensive workloads: architectures, performance models and evaluation}, journal = {Journal of Parallel and Distributed Computing}, year = {2004}, month = {March}, volume = {64}, number = {3}, pages = {427--445}, institution = {Northwestern Univ, Dept Elect \& Comp Engn, Evanston, IL 60208 USA; Northwestern Univ, Dept Elect \& Comp Engn, Evanston, IL 60208 USA; Penn State Univ, Dept Comp Sci \& Engn, University Pk, PA 16802 USA}, publisher = {Academic Press}, copyright = {(c)2004 Institute for Scientific Information, Inc.}, URL = {http://www.sciencedirect.com/science?_ob=ArticleURL&_udi=B6WKJ-4C2NCJB-2&_user=10&_handle=B-WA-A-A-AW-MsSAYVW-UUA-AUYDWYAVCD-AUYVYZWWCD-VAZYCWZWZ-AW-U&_f% mt=summary&_coverDate=03%2F31%2F2004&_rdoc=10&_orig=browse&_srch=%23toc%236908%232004%23999359996%23491486!&_cdi=6908&view=c&_acct=C000050221&_version=1&_ur% lVersion=0&_userid=10&md5=1e1c7dff847a1da714e396fcd2a3d908}, keywords = {smart disks, active disks, I/O performance evaluation, analytic performance models, infiniband, data mining, data clustering, I/O architecture, pario-bib}, abstract = {Processor-embedded disks, or smart disks, with their network interface controller, can in effect be viewed as processing elements with on-disk memory and secondary storage. The data sizes and access patterns of today's large I/O-intensive workloads require architectures whose processing power scales with increased storage capacity. To address this concern, we propose and evaluate disk-based distributed smart storage architectures. Based on analytically derived performance models, our evaluation with representative workloads show that offloading processing and performing point-to-point data communication improve performance over centralized architectures. Our results also demonstrate that distributed smart disk systems exhibit desirable scalability and can efficiently handle I/O-intensive workloads, such as commercial decision support database (ITPC-H) queries, association rules mining, data clustering, and two-dimensional fast Fourier transform, among others.} }