如何在 Octave 中向量化此函数的循环?
How can I vectorize the loops of this function in Octave?
我希望能够矢量化此函数的 for 循环,然后能够以八度音程对其进行并行化。这些 for 循环可以向量化吗?非常感谢您!
我附上函数的代码,注释每个 for 循环和 if-else 的开始和结束。
function [par]=pem_v(tsm,pr)
% tsm and pr are arrays of N by n. % par is an array of N by 8
tss=[27:0.5:32];
tc=[20:0.01:29];
N=size(tsm,1);
% main-loop
for ii=1:N
% I extract the rows in each loop because each one represents a sample
sst=tsm(ii,:); sst=sst'; %then I convert each sample to column vectors
pre=pr(ii,:); pre=pre';
% main-condition
if isnan(nanmean(sst))==1;
par(ii,1:8)=NaN;
else
% first sub-loop
for k=1:length(tss);
idxx=find(sst>=tss(k)-0.25 & sst<=tss(k)+0.25);
out(k)=prctile(pre(idxx),90);
end
% end first sub-loop
tp90=tss(find(max(out)==out));
% second sub-loop
for j=1:length(tc)
cond1=find(sst>=tc(j) & sst<=tp90);
cond2=find(sst>=tp90);
pem=zeros(length(sst),1);
A=[sst(cond1),ones(length(cond1),1)];
B=regress(pre(cond1),A);
pt90=B(1)*(tp90-tc(j));
AA=[(sst(cond2)-tp90)];
BB=regress(pre(cond2)-pt90,AA);
pem(cond1)=max(0,B(1)*(sst(cond1)-tc(j)));
pem(cond2)=max(0,(BB(1)*(sst(cond2)-tp90))+pt90);
clear A B AA BB;
E(j)=sqrt(nansum((pem-pre).^2)/length(pre));
clear pem;
end
% end second sub-loop
tcc=tc(find(E==min(E)));
% sub-condition
if(isempty(tcc)==1);
par(ii,1:9)=NaN;
else
cond1=find(sst>=tcc & sst<=tp90);
cond2=find(sst>=tp90);
pem=zeros(length(sst),1);
A=[sst(cond1),ones(length(cond1),1)];
B=regress(pre(cond1),A);
pt90=B(1)*(tp90-tcc);
AA=[sst(cond2)-tp90];
BB=regress(pre(cond2)-pt90,AA);
pem(cond1)=max(0,B(1)*(sst(cond1)-tcc));
pem(cond2)=max(0,(BB(1)*(sst(cond2)-tp90))+pt90);
RMSE=sqrt(nansum((pem-pre).^2)/length(pre));
% outputs
par(ii,1)=tcc;
par(ii,2)=tp90;
par(ii,3)=B(1);
par(ii,4)=BB(1);
par(ii,5)=RMSE;
par(ii,6)=nanmean(sst);
par(ii,7)=nanmean(pre);
par(ii,8)=nanmean(pem);
end
% end sub-condition
clear pem pre sst RMSE BB B tp90 tcc
end
% end main-condition
end
% end main-loop
您没有给出任何示例输入,所以我创建了一些类似的输入:
N = 5; n = 800;
tsm = rand(N,n)*5+27; pr = rand(N,n);
那么,在考虑对代码进行矢量化之前,您应该牢记 4 件事...
- 避免每次循环都计算相同的东西(比如向量的大小),而是在循环之前计算
- 尽可能预先分配数组(将它们声明为 zeros/NaNs 等)
- 不要使用
find
将逻辑索引转换为线性索引,没有必要并且it will slow down your code
- 不要重复使用
clear
,尤其是在循环中多次使用。它很慢!相反,使用预分配来确保变量符合您对每个循环的预期。
使用上面的随机输入,并考虑到这 4 件事,下面的代码比你的代码快 ~65%。注意:这甚至没有进行任何矢量化!
function [par]=pem_v(tsm,pr)
% tsm and pr are arrays of N by n.
% par is an array of N by 8
tss=[27:0.5:32];
tc=[20:0.01:29];
N=size(tsm,1);
% Transpose once here instead of every loop
tsm = tsm';
pr = pr';
% Pre-allocate memory for output 'par'
par = NaN(N, 8);
% Don't compute these every loop, do it before the loop.
% numel simpler than length for vectors, and size is clearer still
ntss = numel(tss);
nsst = size(tsm,1);
ntc = numel(tc);
npr = size(pr, 1);
for ii=1:N
% Extract the columns in each loop because each one represents a sample
sst=tsm(:,ii);
pre=pr(:,ii);
% main-condition. Previously isnan(nanmean(sst))==1, but that's only true if all(isnan(sst))
% We don't need to assign par(ii,1:8)=NaN since we initialised par to a matrix of NaNs
if ~all(isnan(sst));
% first sub-loop, initialise 'out' first
out = zeros(1, ntss);
for k=1:ntss;
% Don't use FIND on an indexing vector. Use the logical index raw, it's quicker
idxx = (sst>=tss(k)-0.25 & sst<=tss(k)+0.25);
% We need a check that some values of idxx are true, otherwise prctile will error.
if nnz(idxx) > 0
out(k) = prctile(pre(idxx), 90);
end
end
% Again, no need for FIND, just reduces speed. This is a theme...
tp90=tss(max(out)==out);
for jj=1:ntc
cond1 = (sst>=tc(jj) & sst<=tp90);
cond2 = (sst>=tp90);
% Use nnz (numer of non-zero) instead of length, since cond1 is now a logical vector of all elements
A = [sst(cond1),ones(nnz(cond1),1)];
B = regress(pre(cond1), A);
pt90 = B(1)*(tp90-tc(jj));
AA = [(sst(cond2)-tp90)];
BB = regress(pre(cond2)-pt90,AA);
pem=zeros(nsst,1);
pem(cond1) = max(0, B(1)*(sst(cond1)-tc(jj)));
pem(cond2) = max(0, (BB(1)*(sst(cond2)-tp90))+pt90);
E(jj) = sqrt(nansum((pem-pre).^2)/npr);
end
tcc = tc(E==min(E));
if ~isempty(tcc);
cond1 = (sst>=tcc & sst<=tp90);
cond2 = (sst>=tp90);
A = [sst(cond1),ones(nnz(cond1),1)];
B = regress(pre(cond1),A);
pt90 = B(1)*(tp90-tcc);
AA = [sst(cond2)-tp90];
BB = regress(pre(cond2)-pt90,AA);
pem = zeros(length(sst),1);
pem(cond1) = max(0, B(1)*(sst(cond1)-tcc));
pem(cond2) = max(0, (BB(1)*(sst(cond2)-tp90))+pt90);
RMSE = sqrt(nansum((pem-pre).^2)/npr);
% Outputs, which we might as well assign all at once!
par(ii,:)=[tcc, tp90, B(1), BB(1), RMSE, ...
nanmean(sst), nanmean(pre), nanmean(pem)];
end
end
end
我希望能够矢量化此函数的 for 循环,然后能够以八度音程对其进行并行化。这些 for 循环可以向量化吗?非常感谢您!
我附上函数的代码,注释每个 for 循环和 if-else 的开始和结束。
function [par]=pem_v(tsm,pr)
% tsm and pr are arrays of N by n. % par is an array of N by 8
tss=[27:0.5:32];
tc=[20:0.01:29];
N=size(tsm,1);
% main-loop
for ii=1:N
% I extract the rows in each loop because each one represents a sample
sst=tsm(ii,:); sst=sst'; %then I convert each sample to column vectors
pre=pr(ii,:); pre=pre';
% main-condition
if isnan(nanmean(sst))==1;
par(ii,1:8)=NaN;
else
% first sub-loop
for k=1:length(tss);
idxx=find(sst>=tss(k)-0.25 & sst<=tss(k)+0.25);
out(k)=prctile(pre(idxx),90);
end
% end first sub-loop
tp90=tss(find(max(out)==out));
% second sub-loop
for j=1:length(tc)
cond1=find(sst>=tc(j) & sst<=tp90);
cond2=find(sst>=tp90);
pem=zeros(length(sst),1);
A=[sst(cond1),ones(length(cond1),1)];
B=regress(pre(cond1),A);
pt90=B(1)*(tp90-tc(j));
AA=[(sst(cond2)-tp90)];
BB=regress(pre(cond2)-pt90,AA);
pem(cond1)=max(0,B(1)*(sst(cond1)-tc(j)));
pem(cond2)=max(0,(BB(1)*(sst(cond2)-tp90))+pt90);
clear A B AA BB;
E(j)=sqrt(nansum((pem-pre).^2)/length(pre));
clear pem;
end
% end second sub-loop
tcc=tc(find(E==min(E)));
% sub-condition
if(isempty(tcc)==1);
par(ii,1:9)=NaN;
else
cond1=find(sst>=tcc & sst<=tp90);
cond2=find(sst>=tp90);
pem=zeros(length(sst),1);
A=[sst(cond1),ones(length(cond1),1)];
B=regress(pre(cond1),A);
pt90=B(1)*(tp90-tcc);
AA=[sst(cond2)-tp90];
BB=regress(pre(cond2)-pt90,AA);
pem(cond1)=max(0,B(1)*(sst(cond1)-tcc));
pem(cond2)=max(0,(BB(1)*(sst(cond2)-tp90))+pt90);
RMSE=sqrt(nansum((pem-pre).^2)/length(pre));
% outputs
par(ii,1)=tcc;
par(ii,2)=tp90;
par(ii,3)=B(1);
par(ii,4)=BB(1);
par(ii,5)=RMSE;
par(ii,6)=nanmean(sst);
par(ii,7)=nanmean(pre);
par(ii,8)=nanmean(pem);
end
% end sub-condition
clear pem pre sst RMSE BB B tp90 tcc
end
% end main-condition
end
% end main-loop
您没有给出任何示例输入,所以我创建了一些类似的输入:
N = 5; n = 800;
tsm = rand(N,n)*5+27; pr = rand(N,n);
那么,在考虑对代码进行矢量化之前,您应该牢记 4 件事...
- 避免每次循环都计算相同的东西(比如向量的大小),而是在循环之前计算
- 尽可能预先分配数组(将它们声明为 zeros/NaNs 等)
- 不要使用
find
将逻辑索引转换为线性索引,没有必要并且it will slow down your code - 不要重复使用
clear
,尤其是在循环中多次使用。它很慢!相反,使用预分配来确保变量符合您对每个循环的预期。
使用上面的随机输入,并考虑到这 4 件事,下面的代码比你的代码快 ~65%。注意:这甚至没有进行任何矢量化!
function [par]=pem_v(tsm,pr)
% tsm and pr are arrays of N by n.
% par is an array of N by 8
tss=[27:0.5:32];
tc=[20:0.01:29];
N=size(tsm,1);
% Transpose once here instead of every loop
tsm = tsm';
pr = pr';
% Pre-allocate memory for output 'par'
par = NaN(N, 8);
% Don't compute these every loop, do it before the loop.
% numel simpler than length for vectors, and size is clearer still
ntss = numel(tss);
nsst = size(tsm,1);
ntc = numel(tc);
npr = size(pr, 1);
for ii=1:N
% Extract the columns in each loop because each one represents a sample
sst=tsm(:,ii);
pre=pr(:,ii);
% main-condition. Previously isnan(nanmean(sst))==1, but that's only true if all(isnan(sst))
% We don't need to assign par(ii,1:8)=NaN since we initialised par to a matrix of NaNs
if ~all(isnan(sst));
% first sub-loop, initialise 'out' first
out = zeros(1, ntss);
for k=1:ntss;
% Don't use FIND on an indexing vector. Use the logical index raw, it's quicker
idxx = (sst>=tss(k)-0.25 & sst<=tss(k)+0.25);
% We need a check that some values of idxx are true, otherwise prctile will error.
if nnz(idxx) > 0
out(k) = prctile(pre(idxx), 90);
end
end
% Again, no need for FIND, just reduces speed. This is a theme...
tp90=tss(max(out)==out);
for jj=1:ntc
cond1 = (sst>=tc(jj) & sst<=tp90);
cond2 = (sst>=tp90);
% Use nnz (numer of non-zero) instead of length, since cond1 is now a logical vector of all elements
A = [sst(cond1),ones(nnz(cond1),1)];
B = regress(pre(cond1), A);
pt90 = B(1)*(tp90-tc(jj));
AA = [(sst(cond2)-tp90)];
BB = regress(pre(cond2)-pt90,AA);
pem=zeros(nsst,1);
pem(cond1) = max(0, B(1)*(sst(cond1)-tc(jj)));
pem(cond2) = max(0, (BB(1)*(sst(cond2)-tp90))+pt90);
E(jj) = sqrt(nansum((pem-pre).^2)/npr);
end
tcc = tc(E==min(E));
if ~isempty(tcc);
cond1 = (sst>=tcc & sst<=tp90);
cond2 = (sst>=tp90);
A = [sst(cond1),ones(nnz(cond1),1)];
B = regress(pre(cond1),A);
pt90 = B(1)*(tp90-tcc);
AA = [sst(cond2)-tp90];
BB = regress(pre(cond2)-pt90,AA);
pem = zeros(length(sst),1);
pem(cond1) = max(0, B(1)*(sst(cond1)-tcc));
pem(cond2) = max(0, (BB(1)*(sst(cond2)-tp90))+pt90);
RMSE = sqrt(nansum((pem-pre).^2)/npr);
% Outputs, which we might as well assign all at once!
par(ii,:)=[tcc, tp90, B(1), BB(1), RMSE, ...
nanmean(sst), nanmean(pre), nanmean(pem)];
end
end
end