function optimal_level = opt_level(zvec, Cmat) % PURPOSE: Find optimal level of a partition tree based on fuzzy decision. %------------------------------------------------------------------------------- % USAGE: optimal_level = opt_level(zvec, Cmat) % where: zvec = ascending vector of similarity values ((N + 1) x 1) % 0 = zvec(1) < zvec(2) < ... < zvec(N) < zvec(N+1) = 1. % [Example] zvec = [0; 0.1; 0.5; 0.9; 1]; % N = # of variables % Cmat = matrix representing cluster structure at each level ((N+1) x N) % Cmat(i,j) represents how many clusters have j members % at level zvec(i). % [Example] Cmat = [0, 0, 0, 1; % 0, 0, 0, 1; % 0, 2, 0, 0; % 2, 1, 0, 0; % 4, 0, 0, 0]; % two variables merge at 0.9, the other two % variables merge at 0.5, and then those two % clusters merge at 0.1. %------------------------------------------------------------------------------- % RETURNS: optimal_level = [optimal level based on max; % optimal level based on power mean; % optimal level based on arithmetic mean] (3 x 1) % ------------------------------------------------------------------------------- % NOTE: This code does not work when more than one pair merge at the same time. % We assume that only one pair merges at each step. %------------------------------------------------------------------------------- % REFERENCE: % H. Yamashita and T. Takizawa (2010). Fuzzy Theory. Tokyo: Kyoritsu % Shuppan Co., Ltd. (in Japanese). % % K. Motegi (2014). Fuzzy Cluster Analysis with Mixed Frequency Data. % Working Paper at Waseda University. % ------------------------------------------------------------------------------ % Written by Kaiji Motegi, Faculty of Political Science and Economics, % Waseda University. % June 22, 2014. % ------------------------------------------------------------------------------ % Error checking on zvec if zvec(1) ~= 0 error('zvec(1) must be 0.'); elseif zvec(end) ~= 1 error('zvec(end) must be 1.'); elseif issorted(zvec) == 0 error('zvec must be an ascending vector.'); end; % # of variables N = size(Cmat, 2); % Error checking on Cmat if any(Cmat(1,:) ~= [zeros(1, N-1), 1]) error('Cmat(1,:) must be [zeros(1, N-1), 1].'); elseif any(Cmat(end,:) ~= [N, zeros(1, N-1)]) error('Cmat(end,:) must be [N, zeros(1, N-1)]'); elseif any(sum(repmat(1:N, N+1, 1) .* Cmat, 2) ~= N * ones(N+1,1)) error('inner product of each row of Cmat and [1; 2; ...; N] must equal N.'); end; % # of clusters at each level x = [1; (1:N)']; % standardized # of clusters at each level p = (x - 1) / (N - 1); % cluster size based on max S_max = zeros(N+1, 1); for i = 1:(N+1) S_max(i)= find(Cmat(i,:), 1, 'last'); end; % cluster size based on power mean S_power = zeros(N+1, 1); for i = 1:(N+1) S_power(i) = (sum(Cmat(i,:) .* ((1:N).^x(i))) / x(i))^(1 / x(i)); end; % cluster size based on arithmetic mean S_arith = N ./ x; % standardized cluster size q_max = (S_max - S_max(end)) / (S_max(1) - S_max(end)); q_power = (S_power - S_power(end)) / (S_power(1) - S_power(end)); q_arith = (S_arith - S_arith(end)) / (S_arith(1) - S_arith(end)); % p and q have a trade-off, so fuzzy decision is employed. % max min_max = min(p, q_max); max_min_max = max(min_max); opt_max_index = find(min_max == max_min_max, 1, 'last'); % last element having maximum % power mean min_power = min(p, q_power); max_min_power = max(min_power); opt_power_index = find(min_power == max_min_power, 1, 'last'); % arithmetic mean min_arith = min(p, q_arith); max_min_arith = max(min_arith); opt_arith_index = find(min_arith == max_min_arith, 1, 'last'); % optimal levels opt_max = zvec(opt_max_index); opt_power = zvec(opt_power_index); opt_arith = zvec(opt_arith_index); % save results optimal_level = [opt_max; opt_power; opt_arith];