@TechReport{Dartmouth:TR99-359,
author = {Bruce Randall Donald and Chris Bailey-Kellogg and John J. Kelley and Clifford Stein},
title = {{SAR by MS for Functional Genomics (Structure-Activity Relation by Mass Spectrometry)}},
institution = {Dartmouth College, Computer Science},
address = {Hanover, NH},
number = {PCS-TR99-359},
year = {1999},
month = {October},
URL = {http://www.cs.dartmouth.edu/reports/TR99-359.ps.Z},
comment = {
This report is superceded by
TR2000-362
.
},
abstract = {
Large-scale functional genomics will require fast,
high-throughput experimental techniques, coupled with sophisticated
computer algorithms for data analysis and experiment planning. In
this paper, we introduce a combined experimental-computational
protocol called Structure-Activity Relation by Mass Spectrometry
(SAR by MS), which can be used to elucidate the function of
protein-DNA or protein-protein complexes. We present algorithms for
SAR by MS and analyze their complexity. Carefully-designed
Matrix-Assisted Laser Desorption/Ionization Time-Of-Flight (MALDI TOF)
and Electrospray Ionization (ESI) assays require only femtomolar
samples, take only microseconds per spectrum to record, enjoy a
resolution of up to one dalton in $10^6$, and (in the case of MALDI)
can operate on protein complexes up to a megadalton in mass. Hence,
the technique is attractive for high-throughput functional genomics.
In SAR by MS, selected residues or nucleosides are 2H-, 13C-,
and/or 15N-labeled. Second, the complex is crosslinked. Third, the
complex is cleaved with proteases and/or endonucleases. Depending on
the binding mode, some cleavage sites will be shielded by the
crosslinking. Finally, a mass spectrum of the resulting fragments is
obtained and analyzed. The last step is the Data Analysis
phase, in which the mass signatures are interpreted to obtain
constraints on the functional binding mode. Experiment Planning
entails deciding what labeling strategy and cleaving agents to employ,
so as to minimize mass degeneracy and spectral overlap, in order that
the constraints derived in data analysis yield a small number of
binding hypotheses.
A number of combinatorial and algorithmic questions arise in deriving
algorithms for both Experiment Planning and Data Analysis. We explore
the complexity of these problems, obtaining upper and lower bounds.
Experimental results are reported from an implementation of our
algorithms.
}
}