作者celestialgod (攸蓝)
看板MATLAB
标题[问题] MEX加速 with Eigen
时间Sun Dec 22 00:08:12 2013
我想要利用matlab MEX file来加速
我遇到了一个问题是
MEX的速度应该会比matlab快上几倍
而且Eigen应该会比matlab快上10~30倍
但是我的matlab跑一次是0.1707 sec
我的mex跑一次是 0.667856 sec
为什麽我的mex可以那麽慢,有大大可以指导我一下吗?
附上我完整的code
#include "mex.h"
#include "Eigen/Dense"
#include "Eigen/Core"
#include <iostream>
using namespace Eigen;
using namespace std;
typedef Map<MatrixXd> MexMat;
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
{
ptrdiff_t n_de = mxGetM(prhs[0]), n_basis = mxGetN(prhs[0]);
ptrdiff_t n_nu = mxGetM(prhs[1]);
ptrdiff_t n_w = mxGetN(prhs[2]) , n_p = mxGetN(prhs[3]);
ptrdiff_t n_min = min(n_de, n_nu);
MexMat diff_SQ_de ( mxGetPr(prhs[0]), n_de, n_basis);
MexMat diff_SQ_nu ( mxGetPr(prhs[1]), n_nu, n_basis);
MexMat width_candidates ( mxGetPr(prhs[2]), 1, n_w);
MexMat panelty_candidates ( mxGetPr(prhs[3]), 1, n_p);
MatrixXd ker_de_tr(n_de, n_basis), ker_nu_tr(n_nu, n_basis);
MatrixXd H_hat(n_basis, n_basis), h_hat(n_basis, n_basis);
MatrixXd ker_de_tr2(n_min, n_basis), ker_nu_tr2(n_min, n_basis);
MatrixXd B(n_basis, n_basis), Beta(n_basis, 1), B_de(n_basis, n_min);
MatrixXd tmp(1, n_min), B0(n_basis, n_min), B1(n_basis, n_min);
MatrixXd A(n_basis, n_min), r_de(n_min,1), r_nu(n_min,1);
MatrixXd CV_score (n_w, n_p), width(1,1), panelty(1,1);
for (int width_run = 0; width_run < n_w; width_run++){
width(0,0) = width_candidates(0, width_run);
ker_de_tr = (diff_SQ_de.array() / (-2) / pow(width(0,0),2)).array().exp();
ker_nu_tr = (diff_SQ_nu.array() / (-2) / pow(width(0,0),2)).array().exp();
H_hat = ker_de_tr.transpose() * ker_de_tr / n_de;
h_hat = ((ker_nu_tr / n_nu).colwise().sum()).transpose();
ker_de_tr2 = (ker_de_tr.block(0,0,n_min,n_basis)).transpose();
ker_nu_tr2 = (ker_nu_tr.block(0,0,n_min,n_basis)).transpose();
for (int panelty_run = 0; panelty_run < n_p; panelty_run++){
panelty(0,0) = panelty_candidates(0, panelty_run);
H_hat.diagonal() = (H_hat.diagonal()).array() + panelty(0,0)*(n_de-1)/n_de;
B = H_hat.inverse();
Beta = B * h_hat;
B_de = B * ker_de_tr2;
tmp = ((ker_de_tr2.cwiseProduct(B_de)).colwise().sum()).array() * (-1) + n_de;
B0 = (Beta.replicate(1, n_min) + (((Beta.transpose() * ker_de_tr2)
.cwiseQuotient(tmp)).replicate(n_basis, 1).cwiseProduct(B_de))) * n_nu;
B1 = B*ker_nu_tr2 + B_de.cwiseProduct(((((ker_nu_tr2.cwiseProduct(
B_de)).colwise().sum()).cwiseQuotient(tmp)).replicate(n_basis,1)));
A = ((B0-B1) * (n_de-1)/(n_de*(n_nu-1))).cwiseMax(MatrixXd::Zero(n_basis
,n_min));
r_de = (ker_de_tr2.cwiseProduct(A)).colwise().sum();
r_nu = (ker_nu_tr2.cwiseProduct(A)).colwise().sum();
CV_score(width_run, panelty_run) = (r_de.array().pow(2)/n_de).sum()/2 -
(r_nu / n_nu).sum();
}
}
int loc_w, loc_p;
MatrixXd min_cv_score;
CV_score.minCoeff(&loc_p, &loc_w);
MatrixXd O1(1,1), O2(1,1);
O1(0,0) = width_candidates(0,loc_w);
O2(0,0) = panelty_candidates(0,loc_p);
plhs[0] = mxCreateDoubleMatrix(1, 1, mxREAL);
plhs[1] = mxCreateDoubleMatrix(1, 1, mxREAL);
MexMat Output1 ( mxGetPr(plhs[0]), 1, 1 );
MexMat Output2 ( mxGetPr(plhs[1]), 1, 1 );
Output1 = O1;
Output2 = O2;
}
我在猜想是不是每次取代这麽多东西造成记忆体不断再复制,而拖慢
希望有人可以帮我找到我哪里写得不好导致matlab比较快。
最後补上我的系统资讯
file (include test.m):
http://myweb.ncku.edu.tw/~r26014014/uLSIF.rar
Platform OS: windows 7 SP1 64bit
Matlab version: 2013b
compiler: VS 2012
compile command:
mex -v -largeArrayDims -IC:\Eigen mex_file.cpp COMPFLAGS="/Ox $COMPFLAGS"
另外,我有google到这篇:
http://tinyurl.com/mzhajo6
最後,如果解决问题需要我全部的程式码,可以寄站内信。
--
※ 发信站: 批踢踢实业坊(ptt.cc)
◆ From: 140.116.152.221
1F:→ forloricever:optimization flag 传给 compiler 了吗? 12/22 04:45
我google之後,加上/Ox之後,还是要0.6678 sec
2F:推 forloricever:看起来像在做 least squares, K1 不用做出来吧 12/22 04:47
我的case更加复杂,K1还会用在其他地方....
此外这里有L2 panelty term
4F:推 forloricever:L2 penalty term 也不用乘出来 12/23 08:07
5F:→ forloricever:大概是 K2 下补 0, K1 下加上diagonal sqrt penalty 12/23 08:13
大大可以给我一点实际例子吗? 因为我run出来不是LS...
我自己去run了一个test
A is 3 by 3 matrix. b is 3 by 1 matrix
A.colPivHouseholderQr.solve(b) is equal to A.lu().solve(b)
not the inv(A'A)*b
test code:
#include <iostream>
#include <Eigen/Dense>
#include <Eigen/Core>
using namespace Eigen;
using namespace std;
typedef Map<MatrixXd> MexMat;
int main()
{
double mat[9] = {-1.0,-2.0,-3.0,4.0, 5.5, 6.5, -7.7, -8.0, 11.0};
double mat2[3] = {1.0,1.0,3.0};
MexMat EiMat (mat, 3, 3);
MexMat EiMat2 (mat2, 3, 1);
cout << EiMat.colPivHouseholderQr().solve(EiMat2) << endl;
cout << EiMat.lu().solve(EiMat2) << endl;
}
Result:
1.11111
0.765432
0.123457
1.11111
0.765432
0.123457
※ 编辑: celestialgod 来自: 36.238.92.7 (12/24 01:59)
6F:推 forloricever:不是 inv(A'A)A'b ? 12/24 11:47