function Rsq_mat = MIDAS_Rsq(x_H_mat, x_L_mat, adjflag, normflag) % PURPOSE: Compute a similarity matrix containing (adusted) R^2. Both mixed % data sampling (MIDAS) regression and usual single-frequency % regression can be used. %-------------------------------------------------------------------------- % USAGE: Rsq_mat = MIDAS_Rsq(x_H_mat, x_L_mat, adjflag) % where: x_H_mat = high frequency variables (T_L x m x K_H) % x_H_mat(t,1,k) is oldest and x_H_mat(t,m,k) is newest % observation of variable k given low frequency time % period t. % x_H_mat can be [] (single-frequency case) % T_L = low frequency sample size (scalar) % m = ratio of sampling frequencies (scalar) % K_H = # of high frequency variables (scalar) % x_L_mat = low frequency variables (T_L x K_L) % x_L_mat(:,k) is the k-th low frequency variable. % x_L_mat must not be []. % K_L = # of low frequency variables (scalar) % adjflag = 1 for adjusted R^2 % = 0 for R^2 % normflag = 1 if you want to standardize adjusted R^2 between [0,1] % = 0 otherwise (default) %-------------------------------------------------------------------------- % RETURNS: Rsq_mat = similarity matrix containing (adjusted) R^2 % (K_star x K_star) % K_star = K_H + K_L. % Rsq_mat(i,j) has a similarity value between variables % i and j, where high frequency variables occupy K_H x K_H % upper-left block and low frequency variables occupy % K_L x K_L lower-right block. %-------------------------------------------------------------------------- % REFERENCE: % Kaiji Motegi (2014). Fuzzy Cluster Analysis with Mixed Frequency Data. % Working Paper at Waseda University. % ------------------------------------------------------------------------- % Model for a pair of high frequency variable and low frequency variable: % x_L(t) = alpha + beta(1)*x_H(t,1) + ... + beta(m)*x_H(t,m) + u_L(t) % Model for a pair of high frequency variables: % standard single-frequency (but high frequency) regression. % Model for a pair of low frequency variables: % standard single-frequency (low frequency) regression. % ------------------------------------------------------------------------- % Written by Kaiji Motegi, UNC Chapel Hill. % July 4, 2014. % ------------------------------------------------------------------------- % default setting if nargin == 3 normflag = 0; % leave negative adjusted R^2 alone end; % Define a small function that calculates (adjusted) R^2 % assume regressor X contains a constant term function Rsq = R_squared(y, X, adjflag) beta = X \ y; % OLS estimator if adjflag == 1 % adjusted R^2 resid = y - X * beta; num = resid' * resid / (size(y,1) - size(X,2)); den = cov(y); Rsq = 1 - num / den; else % R^2 y_bar = mean(y); y_hat = X * beta; ESS = (y_hat - y_bar)' * (y_hat - y_bar); % explained sum of squares TSS = (y - y_bar)' * (y - y_bar); % total sum of squares Rsq = ESS / TSS; end; end % start main code [T_L, K_L] = size(x_L_mat); if K_L == 0 error('x_L_mat cannot be empty although x_H_mat can (single frequency)'); end; cons = ones(T_L, 1); if isempty(x_H_mat) % single-frequency case K_star = K_L; Rsq_mat = eye(K_L); % Rsq_mat boils down to a K_L x K_L matrix if K_L > 1 for i = 2:K_L % lower part for j = 1:(i-1) % regress i-th LF variable onto j-th LF variable y = x_L_mat(:, i); X = [cons, x_L_mat(:, j)]; Rsq_mat(i,j) = R_squared(y, X, adjflag); end; end; Rsq_mat = Rsq_mat + Rsq_mat' - eye(K_L); % copy lower part to upper part end; else % mixed frequency case [~, m, K_H] = size(x_H_mat); K_star = K_H + K_L; T = m * T_L; CONS = ones(T, 1); Rsq_mat = eye(K_star); % high frequency variable vs. another high frequency variable if K_H > 1 for i = 2:K_H for j = 1:(i-1) % regress i-th HF variable onto j-th HF variable % on high frequency basis y = reshape(x_H_mat(:,:,i)', T, 1); X = [CONS, reshape(x_H_mat(:,:,j)', T, 1)]; Rsq_mat(i,j) = R_squared(y, X, adjflag); end; end; end; % low frequency variable vs. another low frequency variable if K_L > 1 for i = (K_H + 2):K_star for j = (K_H + 1):(i-1) % regress i-th LF variable onto j-th LF variable y = x_L_mat(:, i - K_H); X = [cons, x_L_mat(:, j - K_H)]; Rsq_mat(i,j) = R_squared(y, X, adjflag); end; end; end; % low frequency variable vs. high frequency variable: MIDAS regression for i = (K_H + 1):K_star for j = 1:K_H % regress i-th LF variable onto j-th HF variable y = x_L_mat(:, i - K_H); X = [cons, x_H_mat(:,:,j)]; % include m high frequency obs. Rsq_mat(i,j) = R_squared(y, X, adjflag); end; end; % copy lower part to upper part Rsq_mat = Rsq_mat + Rsq_mat' - eye(K_star); end; % adjusted R^2 may go below 0 if model fitting is very poor. % If you want to standardize adjusted R^2 between [0,1], then you can % force the minimum value to be 0 and keep the mamimum value same. if (K_star > 1) && (adjflag == 1) && (normflag == 1) % do normalize Rsq_max = Rsq_mat(2,1); % maximum value Rsq_min = Rsq_mat(2,1); % minimum value % find max and min for i = 2:K_star for j = 1:(i-1) Rsq_max = max(Rsq_max, Rsq_mat(i,j)); Rsq_min = min(Rsq_min, Rsq_mat(i,j)); end; end; % if minimum is nonnegative, then do nothing if Rsq_min < 0 % min will be normalized to 0, while max will be the same Rsq_mat = (Rsq_mat - Rsq_min) / (Rsq_max - Rsq_min) * Rsq_max; % diagonal elements should always be one for i = 1:K_star Rsq_mat(i,i) = 1; end; end; end; end