@TechReport{Dartmouth:TR2005-530, author = {Ramgopal R. Mettu and Ryan H. Lilien and Bruce R. Donald}, title = {{High-Throughput Inference of Protein-Protein Interaction Sites from Unassigned NMR Data by Analyzing Arrangements Induced By Quadratic Forms on 3-Manifolds}}, institution = {Dartmouth College, Computer Science}, address = {Hanover, NH}, number = {TR2005-530}, year = {2005}, month = {January}, URL = {http://www.cs.dartmouth.edu/reports/TR2005-530.pdf}, comment = { A revised version of this paper has been accepted for publication and will appear at ISMB (2005) and Bioinformatics (2005). }, abstract = { We cast the problem of identifying protein-protein interfaces, using only unassigned NMR spectra, into a geometric clustering problem. Identifying protein-protein interfaces is critical to understanding inter- and intra-cellular communication, and NMR allows the study of protein interaction in solution. However it is often the case that NMR studies of a protein complex are very time-consuming, mainly due to the bottleneck in assigning the chemical shifts, even if the apo structures of the constituent proteins are known. We study whether it is possible, in a high-throughput manner, to identify the interface region of a protein complex using only unassigned chemical shift and residual dipolar coupling (RDC) data. We introduce a geometric optimization problem where we must cluster the cells in an arrangement on the boundary of a 3-manifold. The arrangement is induced by a spherical quadratic form, which in turn is parameterized by SO(3)xR^2. We show that this formalism derives directly from the physics of RDCs. We present an optimal algorithm for this problem that runs in O(n^3 log n) time for an n-residue protein. We then use this clustering algorithm as a subroutine in a practical algorithm for identifying the interface region of a protein complex from unassigned NMR data. We present the results of our algorithm on NMR data for 7 proteins from 5 protein complexes and show that our approach is useful for high-throughput applications in which we seek to rapidly identify the interface region of a protein complex. } }