%%% %%% Dimensionality Reduction of Non-Linear Manifolds %%% %%% Hany Farid and Andrew Pierce %%% Dartmouth College %%% %%% 12.1.00 %%% http://www.cs.dartmouth.edu/farid %%% clear; Npoints = 1500; % NUMBER OF DATA POINTS Sub = 10; % USE Npoints/Sub TO BUILD MANIFOLD %%% EXAMPLE: 2D PARABOLIC CYLINDER %%% x = 2 * (rand( 1, Npoints) - 0.5); y = 2 * (rand( 1, Npoints) - 0.5); z = 1.5*x.^2; %%% SCALE AND MAKE ZERO-MEAN %%% x = x / max(abs(x)); y = y / max(abs(y)); x = x - mean(x); y = y - mean(y); z = z - mean(z); noise = 0.1*randn(7,Npoints); P = [x;y;z;noise]'; % EMBED IN 10-D SPACE %%% COMPUTE DIMENSIONALITY USING PCA %%% fprintf( 'computing dimensionality using PCA... ' ); [u,s,v] = svd( P' * P ); % EIGEN DECOMPOSITION OF COVARIANCE diags = diag( s ); diags = diags / max(diags); % NORMALIZED EIGENVALUES ratio = diags / sum(diags); ind = find( ratio>0.06 ); % LINEAR DIMENSIONALIY dim = length( ind ); fprintf( 'dim = %d\n', dim ); %%% SHOW DATA %%% subplot( 2,2,1 ); cla; plot3( x, y, z, '.' ); box on; axis square; axis( [-1.1 1.1 -1.1 1.1 -1.1 1.1] ); title( sprintf('input data (dim=%d)', dim) ); set( gca, 'Xtick', [-1 0 1], 'Ytick', [-1 0 1] ); subplot( 2,2,3 ); cla; stem( diags ); hold on; plot( diags, 'r' ); hold off; axis square; axis( [0.5 size(P,2)+0.5 0 1.1] ); title( sprintf('eigenvalues = %.2f | %.2f | %.2f', diags(1:3)) ); pause( 0.1 ); %%% BUILD MANIFOLD FROM A RANDOM SUBSET OF POINTS %%% fprintf( 'building manifold...\n' ); ind = randperm( Npoints ); ind = ind( 1:Npoints/Sub ); Q = P( ind, : ); BIGNUM = 9999; E = [-1 -1]; c = 1; N = length(Q); for k = 1 : Npoints R = ones( N,1 ) * P(k,:); dist = (R-Q).^2; dist = sum( dist' ); [m,i] = min( dist ); dist(i) = BIGNUM; [m,j] = min( dist ); if( ~ismember([i j], E, 'rows') & i~=j ) E(c,:) = [i j]; % CONNECT CLOSEST WITH SECOND CLOSEST c = c + 1; end end Nedges = c-1; %%% SHOW MANIFOLD %%% subplot( 2,2,2 ); cla; hold on; for k = 1 : c-1 i = E(k,1); j = E(k,2); plot3([Q(i,1) Q(j,1)], [Q(i,2) Q(j,2)], [Q(i,3) Q(j,3)], 'r.-'); end hold off; axis( [-1.1 1.1 -1.1 1.1 -1.1 1.1] ); axis square; box on; set( gca, 'Xtick', [-1 0 1], 'Ytick', [-1 0 1] ); title( 'manifold' ); pause(0.1); %%% MANIFOLD DISTANCES (FLOYD'S ALGORITHM) %%% fprintf( 'computing manifold distances...\n' ); dim = length( Q ); D = BIGNUM * ones(dim); % DISTANCE MATRIX for k = 1:Nedges % INIT: DIRECT CONNECTIONS i = E(k,1); j = E(k,2); D(i,j) = sum( (Q(i,:) - Q(j,:)).^2 ); D(j,i) = D(i,j); end for k = 1:dim % INIT: DIAGONALS=0 D(k,k) = 0; end i = [1:dim]; for k = 1:dim % FLOYD'S ALGORITHM [O(n^3)] D(i,i) = min(D(i,i), D(i,k)*ones(1,dim)+ones(dim,1)*D(k,i)); end %%% CHECK IF MANIFOLD IS FULLY CONNECTED %%% ind = find(D==BIGNUM); if( ~isempty(ind) ) fprintf( 'WARNING: disconnected manifold\n' ); end ind = find( D(1,:) ~= BIGNUM ); D = D( ind, ind ); % REMOVE DISCONNECTED PARTS %%% METRIC MDS - COMPUTE INTRINSIC DIMENSIONALITY %%% fprintf( 'computing manifold dimensionality... ' ); A = -0.5*D.^2; dim = size( D,1 ); H = eye(dim) - 1/dim * ones(dim,1) * ones(1,dim); B = H * A * H; [u,d,v] = svd( B ); diags = diag( d ); diags = diags / max(diags); % NORMALIZED EIGENVALUES ratio = diags / sum(diags); % NON-LINEAR DIMENSIONALITY ind = find( ratio>0.06 ); % ARBITRARY THRESHOLD dim = length( ind ); fprintf( 'dim = %d\n', dim ); subplot( 2,2,4 ); cla; N = min( 10, size(D,1) ); stem( diags(1:N) ); hold on; plot( diags(1:N), 'r' ); hold off; axis( [0.5 N+0.5 0 1.1] ); axis square; title( sprintf('eigenvalues = %.2f | %.2f | %.2f', diags(1:3)) ); subplot(2,2,2); title( sprintf('input data (dim=%d)', dim) ); fprintf( 'dim = %d\n', dim );