@article{li:ijcnds, author = {Ming Li and David Kotz}, title = {Towards Collaborative Data Reduction in Stream-Processing Systems}, journal = {International Journal of Communication Networks and Distributed Systems (IJCNDS)}, year = {2009}, volume = {2}, number = {4}, pages = {375--400}, publisher = {Inderscience}, copyright = {Inderscience Enterprises Ltd.}, earlier = {li:quality}, doi = {10.1504/IJCNDS.2009.026555}, url = {http://www.cs.dartmouth.edu/~dfk/papers/internal/li-ijcnds.pdf}, abstract = {We consider a distributed system that disseminates high-volume event streams to many simultaneous monitoring applications over a low-bandwidth network. For bandwidth efficiency, we propose a collaborative data-reduction mechanism, ``group-aware stream filtering'', used together with multicast, to select a small set of necessary data that satisfy the needs of a group of subscribers simultaneously. We turn data-compressing filters into group-aware filters by exploiting two overlooked, yet important, properties of monitoring applications: 1)~many of them can tolerate some degree of ``slack'' in their data quality requirements, and 2)~there may exist multiple subsets of the source data satisfying the quality needs of an application. We can thus choose the ``best alternative'' subset for each application to maximize the data overlap within the group to best benefit from multicasting. We provide a general framework that treats the group-aware stream filtering problem completely; we prove the problem NP-hard and thus provide a suite of heuristic algorithms that ensure data quality (specifically, granularity and timeliness) while collaboratively reducing data. The framework is extensible and supports a diverse range of filters. Our prototype-based evaluation shows that group-aware stream filtering is effective in trading CPU time for data reduction, compared with self-interested filtering.} }