info_func={@matrix_info_fro;%1
    @matrix_info_cosine;%2
    @matrix_info_hsic;%3
    @matrix_info_cka;%4
    @matrix_info_chernoff;%5
    @quantum_hellinger_info;%6
    @bures_lb_matrix_info;%7
    @bures_super_lb_matrix_info;%8
    @(x) matrix_info_lawley(x)/size(x,1)^2;%9
    @(x) matrix_info_bartlett(x)/size(x,1)^2;%10
    };
func_names={     'Euclidean',...
    'Cosine',...
    'HSIC',...
    'CKA',...
    'Chernoff',...
    'QH',...
    'Bures',...
    'Sub-Bures',...
    'Lawley',...
    'Bartlett'};

datasets={
    'wedges';
    'boxes';
    'rings';
    'moons';
    'spirals';
    'blobs'};

n=500;

Nsigma=15;
nmonte=10;

sig_min=0.05;
sig_max=20;

sigmas=logspace(log10(sig_min),log10(sig_max),Nsigma)';

rng(880);
allresults=cell(numel(datasets),1);

orig_state = warning('off','MATLAB:eigs:NoEigsConverged'); %warning for poor bandwidth choices            
warning('off','MATLAB:eigs:NotAllEigsConverged'); %warning for poor bandwidth choices            

kkk=1;
for exp_num=[6 1 4 3]
    stats=zeros(Nsigma,nmonte);
    cka_sup=zeros(Nsigma,nmonte);
    info_stats=zeros(Nsigma,nmonte,numel(info_func));
    best_stats=zeros(nmonte,numel(info_func)+1);
    data_points=zeros(nmonte,n,3);
    
    for monte_ii=1:nmonte
        if exp_num>0
            [x,l] = generate_2D_cluster_data_set(n,datasets{exp_num});
        else
            idx=randperm(numel(Y));
            x=XX(idx(1:n),:);
            l=Y(idx(1:n));
        end
        data_points(monte_ii,:,:)=[x,l];
        L=bsxfun(@eq,l,l.');
        
        kcluster=max(l);
        mu=mean(L);
        Lc=bsxfun(@minus,bsxfun(@minus,L,mu),mu')+mean(mu);
        Lc=(Lc+Lc')/2;
        D2= squareform(pdist(x)).^2;
        
        Dtemp=sort(D2);
        sig_max = sqrt(max(Dtemp(end,:)));
        Dtemp(Dtemp==0)=inf;
        dt=sqrt(min(reshape(Dtemp(2:end,:),[],1)));
        sig_min = 2*dt;
        
        
        for sigma_ii=1:Nsigma
            A=exp(-D2/(2*sigmas(sigma_ii)^2));
            A=(A+A')/2;
            for jj=1:numel(info_func)
                info_stats(sigma_ii,monte_ii,jj)= info_func{jj}(A);
            end
            
            K=A;
            mu=mean(K);
            Kc=bsxfun(@minus,bsxfun(@minus,K,mu),mu')+mean(mu);
            Kc=(Kc+Kc')/2;
            cka_sup(sigma_ii,monte_ii)=Kc(:).'*Lc(:)/sqrt(sum(Kc(:).^2)*sum(Lc(:).^2));
            
            %spectral clustering algorithm by Ng Jordan Weiss
            A(1:size(A,1)+1:end)=0;
            d=sum(A,2);
            d(d<1e-9)=1;
            
            K=bsxfun(@rdivide,bsxfun(@rdivide,A,sqrt(d)),sqrt(d'));
            K(1:size(K,1)+1:end)=0;
            K=(K+K.')/2;
            [Ua,S]=eigs(K,kcluster,'la');
            s=diag(S);[~,idx]=sort(s,'descend');
            Ua=Ua(:,idx);
            U=Ua(:,1:kcluster);
            U=bsxfun(@rdivide,U,sqrt(sum(U.^2,2)));
            cluster_idx=kmeans(U,kcluster);
            stats(sigma_ii,monte_ii)=compare_clusterings(l,cluster_idx);
        end
        [~,best_sigma]=max(info_stats,[],1);
        best_sigma=cat(3,best_sigma,4*ones(1,nmonte));
        for jj=1:size(best_sigma,3)
            best_stats(monte_ii,jj)=stats(best_sigma(1,monte_ii,jj),monte_ii);
        end
    end
    

    allresults{kkk}={stats,info_stats,best_stats,cka_sup,data_points};
    kkk=kkk+1;
end
nDatasets=kkk-1;
warning(orig_state);

%%
markers={'o','<','v','>','d','s','^','p','none','x','+','o'};
linewidths=3*ones(numel(markers),1);
linewidths(9)=3;
markersizes=11*ones(numel(markers),1);
markersizes(2)=12;


linestyles={'-','--',':','-.','-','--',':','-.','-','--',':','.-'};
colors=[
    0 146 146;
    146 0 0 ;
    255 109 182;
    255 182 119;
    73 0 146;
    0 109 219;
    182 109 255;
    109 182 255;
    0 0 0;
    219 209 9;
    36 255 36;
    255 255 109
    182 219 255;
    146 73 0;
    ]/255;
colors=mat2cell(colors,ones(size(colors,1),1),3);
keep_idx =[ 1     2     3     4     7     8    10    11  5 6];
colors=colors(keep_idx);
linewidths=linewidths(keep_idx);
markers=markers(keep_idx);
linestyles=linestyles(keep_idx);


figure(8),clf
set(gcf,'position',get(gcf,'position').*[1 1 0 0]+ [0 0 837         465*4/3])
set(gcf,'position',get(gcf,'position').*[1 1 0 0]+ [0 0 837         500])
nColumns=6;

for ii=1:nDatasets
    exp_num=ii;
    stats=allresults{exp_num}{1};
    info_stats=allresults{exp_num}{2};
    best_stats=allresults{exp_num}{3};
    cka_sup=allresults{exp_num}{4};
    x=squeeze(allresults{exp_num}{5}(1,:,1:2));
    l=squeeze(allresults{exp_num}{5}(1,:,3));
    %% Plot the actual data
    figure(8)
    subplot(nDatasets,nColumns,1+(ii-1)*nColumns)
    Dtemp=squareform(pdist(x));
    Dtemp(1:size(Dtemp,1)+1:end)=nan;
    
    md = 2*min(Dtemp(:))+2/9*(max(Dtemp(:))-2*min(Dtemp(:)));
    
    for kk=1:max(l)
        plot(x(l==kk,2),x(l==kk,1),markers{kk})
        hold all
    end
    axis tight
    axis equal
    aa=axis;
    axis(aa+[-diff(aa(1:2))/10 diff(aa(1:2))/10 -diff(aa(3:4))/10 diff(aa(3:4))/10])
    title('Input (n=500)','fontweight','normal')
    scale_x=1.4;
    scale_y=1.4;
    offset_x=-10;
    if ii==1
        text(5*scale_x,max(x(:,1))*scale_y,'A','fontsize',16)
        text(59*scale_x+offset_x,max(x(:,1))*scale_y,'B','fontsize',16)
        text(86*scale_x+offset_x,max(x(:,1))*scale_y,'C','fontsize',16)
        text(132*scale_x+2*offset_x,max(x(:,1))*scale_y,'D','fontsize',16)
    end
    set(gca,'position',get(gca,'position')+[0.025 0 0 0])
    
    %% Plot the NMI versus bandwidth
    subplot(nDatasets,nColumns,4+(ii-1)*nColumns)
    semilogx(sigmas,median(stats(:,:),2),'-','linewidth',3,'color',[.2 .2 .9])
    hold all
    semilogx(sigmas,stats(:,:),'k+','color',[.9 .2 .2])
    if ii==nDatasets
        xlabel('Gaussian bandwidth')
    else
        set(gca,'xticklabel',[])
    end
    title('NMI(Cluster,Label)','fontweight','normal')
    set(gca,'xlim',[sigmas(1)*.75 sigmas(end)*1.5])
    set(gca,'xtick',[0.1 1 10])
    aa=axis;
    axis(aa+[0 0 -diff(aa(3:4))/20 diff(aa(3:4))/20])
    aa=axis;
    %% Plot informativeness versus bandwidth
    subplot(nDatasets,nColumns,(2:3)+(ii-1)*nColumns)
    semilogx(md*[1;1],nan*[0;1],'k-','linewidth',2)
    hold all
    hs=semilogx(sigmas,squeeze(info_stats(:,1,1:8)),'-o');
    for kk=1:numel(hs)
        set(hs(kk),'marker','none','color',colors{kk},...
            'markersize',markersizes(kk),'linewidth',linewidths(kk),...
            'linestyle',linestyles{kk});
    end
    set(gca,'xtick',[0.1 1 10])
    set(gca,'xlim',[sigmas(1)*.75 sigmas(end)*1.5])
    if ii==nDatasets
        xlabel('Gaussian bandwidth')
    else
        set(gca,'xticklabel',[])
        
    end
    title('Informativeness','fontweight','normal')
    legend(hs,func_names(1:8),'Location','westoutside')
    
    %% Plot the NMI for each informativeness measure as a boxplot
    subplot(nDatasets,nColumns,(5:6)+(ii-1)*nColumns)
    boxplot(best_stats(:,1:numel(info_func)))
    set(gca,'xtick',1:numel(info_func),'xticklabel',func_names,'XTickLabelRotation',30)
    set(gca,'ylim',aa(3:4))
    title('NMI(Cluster,Label)','fontweight','normal')
    set(gca,'xgrid','on')
    cc=get(gca,'position');
    set(gca,'position',[cc(1:3) cc(4)+.01])
    
end

set(gcf,'paperpositionmode','auto')
saveas(gcf,'../results/Figure_8_kernelsize_clustering_bandwidth','epsc2')