OpenCV在iOS和x86平台上的性能测试

好久没更新博客了,最近都在忙找工作,目前来看形势还不错。感觉我基本已经跟computer vision没什么关系了,面了很多公司,只有一个是视觉的,其它都是纯码农活儿了。

这学期上了个计算机架构的水课,最后的大作业我就benchmark了一下OpenCV在x86和ARM下面的性能,我的笔记本的CPU是Intel Core i7 620M,iOS测试我用的是iPod Touch,和iPhone 4是一样的CPU, Apple A4。最新的iPhone 4s和iPad2上的Apple A5没机会测,因为这俩都不能完全越狱。x86比ARM快那是肯定的,不过这个benchmark可以看出来到底差多少。

编译:在x86上我用的是64位编译的,因为Apple A4分别支持ARM v6和v7两个版本,我就分别都编译了不同的静态库。

测试:我使用了不同的数据类型,8/16/32位整形,32/64位浮点;不同的输入矩阵大小,4*4/8*8/…/256*256/512*512;不同的操作,加、乘、转置、求逆、SVD,还有一组图像处理的比较。

下面是一些比较结果:

这门课一直有个旁听的哥们儿,present完了下课后他跑过来跟我聊天,原来他是苹果高级硬件工程师,Accelerate Framework组,就是专门搞这些底层优化的,我提到的很多细节他能接着说好多,他说ARM如果优化的好的话,不会像我benchmark的结果那么不济的,可能会比x86差个一两倍的样子。关公面前耍了次大刀。

下面是原始输出数据:

/*****************************************************/
/* (Time: μs)                                        */
/* Platform:		x86					ARM v6				ARM v7     */
/*****************************************************/
Run bench on type CV_8UC1

Size 512:
	cvAdd: 				417.0				11977.0				5991.5
	cvMul:				417.5				9798.0				6805.5
	cvTranspose: 	214.0				3578.0		 		3322.0
Size 256:
	cvAdd: 				85.25				1431.0				619.5
	cvMul:				119.5				1432.75				786.0
	cvTranspose: 	61.25				528.5					412.25
Size 128:
	cvAdd: 				19.625			346.125				145.375
	cvMul:				33.125			403.375				200.125
	cvTranspose: 	10.125			95.875				65.25
Size 64:
	cvAdd: 				4.9375			89.4375				39.8125
	cvMul:				9.6875			94.125				53.8125
	cvTranspose: 	2.6875			28.5					20.25
Size 32:
	cvAdd: 				2.3125			28.125				15.90625
	cvMul:				2.625				29.90625			18.90625
	cvTranspose: 	1.0		 			10.71875		 	8.40625
Size 16:
	cvAdd: 				0.812500		12.984375			10.25
	cvMul:				0.859375		12.90625			10.125
	cvTranspose: 	0.390625		6.46875		 		5.8125
Size 8:
	cvAdd: 				0.390625		9.03125				8.601562
	cvMul:				0.421875		8.898438			10.5625
	cvTranspose: 	0.203125		5.234375		 	4.9375
Size 4:
	cvAdd: 				0.324219		8.109375			12.742188
	cvMul:				0.300781		11.875				7.351562
	cvTranspose: 	0.167969		7.757812		 	4.917969

Run bench on type CV_16UC1

Size 512:
	cvAdd: 				411.0				5764.0		 		4730.0
	cvMul:				317.5				7719.5				6369.5
	cvTranspose: 	265.0				9700.5		 		9623.5
Size 256:
	cvAdd: 				65.0			 	1084.75				2062.0
	cvMul:				81.75				1634.5				2225.5
	cvTranspose: 	41.75				821.75		 		590.0
Size 128:
	cvAdd: 				29.25				246.5					184.625
	cvMul:				29.0				406.875				260.125
	cvTranspose: 	18.75				138.0					77.5
Size 64:
	cvAdd: 				7.4375			61.375		 		44.75
	cvMul:				7.5					95.375				68.0
	cvTranspose: 	4.8125			36.5		 			21.6875
Size 32:
	cvAdd: 				2.125			 	30.375		 		17.0625
	cvMul:				2.125				34.34375			22.3125
	cvTranspose: 	1.34375			18.125		 		9.15625
Size 16:
	cvAdd: 				0.765625		11.5625		 		15.5
	cvMul:				0.75				20.90625			11.0625
	cvTranspose: 	0.468750		8.125		 			5.9375
Size 8:
	cvAdd: 				0.429688		14.375		 		8.734375
	cvMul:				0.406250		10.734375			8.312500
	cvTranspose: 	0.257812		8.109375		 	5.164062
Size 4:
	cvAdd: 				0.328125		10.320312			8.316406
	cvMul:				0.316406		11.355469			7.527344
	cvTranspose: 	0.191406		7.027344		 	4.835938

Run bench on type CV_32SC1

Size 512:
	cvAdd: 				967.0			 	6999.0 				6702.0
	cvMul:				356.5				8320.0				6334.5
	cvTranspose: 	362.0				10754.0		 		10374.0
Size 256:
	cvAdd: 				120.25			1857.0		 		1805.5
	cvMul:				63.5				1899.75				2062.25
	cvTranspose: 	69.25			 	946.0		 			928.5
Size 128:
	cvAdd: 				33.0			 	217.125				179.25
	cvMul:				15.875			264.75				194.625
	cvTranspose: 	12.5			 	144.0		 			121.375
Size 64:
	cvAdd: 				8.1875			44.5		 			42.8125
	cvMul:				4.25				68.5					50.125
	cvTranspose: 	3.5625			28.625		 		22.0625
Size 32:
	cvAdd: 				2.5625			14.75		 			14.593750
	cvMul:				1.3125			22.375				17.0
	cvTranspose: 	0.90625			10.8125		 		9.125
Size 16:
	cvAdd: 				0.96875			9.5		 				9.703125
	cvMul:				0.4375			11.078125			10.046875
	cvTranspose: 	0.25		 		6.5625		 		5.765625
Size 8:
	cvAdd: 				0.445312		15.570312		 	8.734375
	cvMul:				0.265625		11.132812			7.828125
	cvTranspose: 	0.148438		5.359375		 	5.179688
Size 4:
	cvAdd: 				0.273438		7.882812		 	9.667969
	cvMul:				0.257812		7.757812			7.523438
	cvTranspose: 	0.125000		4.941406		 	7.773438

Run bench on type CV_32FC1

Size 512:
	cvAdd: 				593.5				21375.5				6131.0
	cvMul:				350.5				19938.5				5418.0
	cvTranspose: 	332.0		 		10824.0				10323.0
	cvInvert:		 	273243.5		25071806.0		9195260.0
	cvSVD: 				423692.0		174561882.0		34340097.5
Size 256:
	cvAdd: 				116.5				5301.75		 		1176.0
	cvMul:				53.75				5172.25				851.75
	cvTranspose: 	53.25		 		1020.25		 		841.5
	cvInvert:			29972.75		2256949.75		813636.0
	cvSVD: 				55176.75		20055794.5		3947355.5
Size 128:
	cvAdd: 				44.0				1046.625			219.5
	cvMul:				18.75				1065.375			167.375
	cvTranspose: 	18.375			216.375		 		121.0
	cvInvert: 		5215.25			259487.25			96067.25
	cvSVD: 				9909.75			2546950.25		494365.125
Size 64:
	cvAdd: 				8.75				261.0625			56.0625
	cvMul:				4.5					248.6875			48.875
	cvTranspose: 	2.9375			28.75		 			22.3125
	cvInvert: 		502.3125		31718.375			12318.125
	cvSVD: 				1814.1875		311974.5			65447.9375
Size 32:
	cvAdd: 				3.0625			69.312500			19.34375
	cvMul:				1.375				151.718750		17.15625
	cvTranspose: 	1.03125			17.031250		 	9.21875
	cvInvert: 		76.3125			8175.531250		1585.75
	cvSVD: 				605.75			45699.031250	8469.8125
Size 16:
	cvAdd: 				5.953125		22.875		 		10.781250
	cvMul:				0.578125		22.796875			9.828125
	cvTranspose: 	4.203125		6.453125			5.796875
	cvInvert: 		14.453125		530.921875		222.046875
	cvSVD: 				57.265625		4529.875			1129.640625
Size 8:
	cvAdd: 				0.414062		11.765625		 	8.992188
	cvMul:				0.351562		11.375				7.859375
	cvTranspose: 	0.250000		5.359375		 	5.046875
	cvInvert: 		1.609375		78.390625			39.562500
	cvSVD: 				11.835938		729.984375		224.789062
Size 4:
	cvAdd: 				0.285156		13.300781			8.308594
	cvMul:				0.199219		10.527344			8.335938
	cvTranspose: 	0.183594		5.429688		 	7.472656
	cvInvert:		 	0.460938		21.921875			13.855469
	cvSVD: 				2.289062		111.308594		52.316406

Run bench on type CV_64FC1

Size 512:
	cvAdd: 				1389.5			30775.0				9891.0
	cvMul:				991.5				29783.5				8469.0
	cvTranspose: 	1137.5			16028.0				15558.5
	cvInvert: 		290684.0		38524160.0		13876679.5
	cvSVD: 				1058105.5		349269399.5		139599952.0
Size 256:
	cvAdd: 				84.0				7091.75				2188.0
	cvMul:				81.25				7265.25				1922.25
	cvTranspose: 	120.75			3243.25				3414.0
	cvInvert: 		34608.25		3467022.5			1393893.75
	cvSVD: 				122659.5		38763803.75		15673162.25
Size 128:
	cvAdd: 				18.875			1391.0				405.875
	cvMul:				19.125			1318.0				505.25
	cvTranspose: 	19.5				319.625				281.125
	cvInvert: 		3644.875		369943.25			157781.625
	cvSVD: 				15826.75		4468381.25		1725834.125
Size 64:
	cvAdd: 				5.125000		337.4375			102.875
	cvMul:				3.937500		333.3125			109.375
	cvTranspose: 	7.062500		58.8125				51.1875
	cvInvert: 		358.4375		44394.625			19697.0625
	cvSVD: 				2203.3125		530734.875		206134.5
Size 32:
	cvAdd: 				1.562500		86.062500			30.125
	cvMul:				1.500000		87.187500			32.34375
	cvTranspose: 	1.781250		18.093750			15.28125
	cvInvert: 		82.6875			5553.281250		2522.625
	cvSVD: 				350.5				64459.53125		25085.78125
Size 16:
	cvAdd: 				0.343750		26.921875			13.546875
	cvMul:				0.328125		27.468750			13.468750
	cvTranspose: 	0.390625		8.234375			7.625
	cvInvert: 		9.781250		716.296875		344.0
	cvSVD: 				72.328125		7913.484375		3133.546875
Size 8:
	cvAdd: 				0.335938		12.734375			9.484375
	cvMul:				0.328125		12.632812			8.687500
	cvTranspose: 	0.335938		5.773438			5.484375
	cvInvert: 		2.375000		102.585938	 	50.281250
	cvSVD: 				14.054688		973.703125		400.695312
Size 4:
	cvAdd: 				0.207031		9.218750			9.550781
	cvMul:				0.160156		8.968750			10.441406
	cvTranspose: 	0.109375 		5.082031			5.308594
	cvInvert: 		0.382812		21.628906	 		15.808594
	cvSVD: 				2.500000		137.589844		68.695312

Run bench on lena

Size 512:
	cvSmooth: 		423.5				34196.5		 		11714.0
	cvSobel: 			649.0		 		67786.5		 		13859.0
	cvDCT: 				5845.0		 	599860.5 			105444.5
	cvDFT: 				3565.5			458634.5			87149.5
Size 256:
	cvSmooth: 		171.0				6350.0		 		2987.0
	cvSobel: 			129.75			16870.0		 		2867.0
	cvDCT: 				1201.75			131520.5			18459.5
	cvDFT: 				765.0		 		98563.75			14267.0
Size 128:
	cvSmooth: 		35.25		 		1783.25				1066.0
	cvSobel: 			27.875		 	4449.375	 		826.875
	cvDCT: 				244.75		 	28718.375			3922.125
	cvDFT: 				171.5		 		21448.125			2890.0
Size 64:
	cvSmooth: 		14.4375		 	587.75		 		383.875
	cvSobel: 			12.5				1662.1875			373.25
	cvDCT: 				58.1875		 	6595.625			905.375
	cvDFT: 				38.5		 		4662.75				643.5
Size 32:
	cvSmooth: 		8.1875		 	267.25		 		211.9375
	cvSobel: 			8.125		 		450.65625			189.875
	cvDCT: 				14.71875		1478.0			 	218.5625
	cvDFT: 				10.3125		 	1032.531250		203.0
Size 16:
	cvSmooth:		 	5.953125		162.6875			145.453125
	cvSobel: 			6.125		 		222.25				144.468750
	cvDCT: 				4.46875			358.625		 		76.734375
	cvDFT: 				3.078125		242.875				61.078125
Size 8:
	cvSmooth: 		5.203125		135.0625			116.539062
	cvSobel: 			4.898438		150.015625		123.187500
	cvDCT: 				1.46875			85.531250			28.054688
	cvDFT: 				1.25				60.304688			29.742188
Size 4:
	cvSmooth: 		4.660156		123.445312		103.402344
	cvSobel: 			4.4375		 	129.628906		110.570312

benchmark的测试代码:

#include <sys/time.h>
#include <stdio.h>
#include <stdlib.h>
#include <cxcore.h>
#include <highgui.h>
#include <cv.h>

float time_diff(struct timeval start, struct timeval end, int rounds);
void run_bench_math_type(int, int);
void run_bench_image(int);

int main(){

	srandom(0);// This gurantees the number sequence is the same

	printf("Time is in micro-seconds.\n");

	int round_base = 2;

	printf("\n\nRun bench on lena\n");

	run_bench_image(round_base);

	printf("\n\nRun bench on type CV_8UC1\n");
	run_bench_math_type(CV_8UC1, round_base);

	printf("\n\nRun bench on type CV_16UC1\n");
	run_bench_math_type(CV_16UC1, round_base);

	printf("\n\nRun bench on type CV_32SC1\n");
	run_bench_math_type(CV_32SC1, round_base);

	printf("\n\nRun bench on type CV_32FC1\n");
	run_bench_math_type(CV_32FC1, round_base);

	printf("\n\nRun bench on type CV_64FC1\n");
	run_bench_math_type(CV_64FC1, round_base);

	return 0;
}

void run_bench_math_type(int elem_type, int roundbase){

	const int min_size = 4;
	const int max_size = 512;
	int nrounds = roundbase;

	CvMat *matA, *matB, *matC;
	struct timeval t0, t1;

	for(int n = max_size; n >= min_size; nrounds*=2, n /= 2){
		printf("Size %d with %d rounds average:\n", n, nrounds);
		matA = cvCreateMat(n, n, elem_type);
		matB = cvCreateMat(n, n, elem_type);
		matC = cvCreateMat(n, n, elem_type);

		// Init for each round
		for (int i = 0; i < n; i++ ){
			for (int j = 0; j < n; j++ ){
				cvSet2D( matA, i, j, cvScalar(1.0 + random()%254, 0, 0, 0));
				cvSet2D( matB, i, j, cvScalar(1.0 + random()%254, 0, 0, 0));
			}
		}

		gettimeofday(&t0, NULL);
		for (int k = 0; k < nrounds; k++){
			cvAdd(matA, matB, matC, NULL);
		}
		gettimeofday(&t1, NULL);
		printf("\tcvAdd: \t\t%f\n", time_diff(t0, t1, nrounds));

		gettimeofday(&t0, NULL);
		for (int k = 0; k < nrounds; k++){
			cvMul(matA, matB, matC, 1);
		}
		gettimeofday(&t1, NULL);
		printf("\tcvMul:\t\t%f\n", time_diff(t0, t1, nrounds));

		gettimeofday(&t0, NULL);
		for (int k = 0; k < nrounds; k++){
			cvTranspose(matA, matC);
		}
		gettimeofday(&t1, NULL);
		printf("\tcvTranspose: \t%f\n", time_diff(t0, t1, nrounds));

		if ( elem_type == CV_32FC1 || elem_type == CV_64FC1 ){
			gettimeofday(&t0, NULL);
			for (int k = 0; k < nrounds; k++){
				cvInvert(matA, matC, CV_LU);
			}
			gettimeofday(&t1, NULL);
			printf("\tcvInvert: \t%f\n", time_diff(t0, t1, nrounds));
		}

		if ( elem_type == CV_32FC1 || elem_type == CV_64FC1 ){
			gettimeofday(&t0, NULL);
			for (int k = 0; k < nrounds; k++){
				cvSVD(matA, matC, NULL, NULL, 1);
			}
			gettimeofday(&t1, NULL);
			printf("\tcvSVD: \t\t%f\n", time_diff(t0, t1, nrounds));
		}
		cvReleaseMat(&matA);
		cvReleaseMat(&matB);
		cvReleaseMat(&matC);
	}
}

void run_bench_image(int roundbase){

	int nrounds = roundbase;
	const int min_size = 4;
	const int max_size = 512;
	struct timeval t0, t1;

	IplImage *source_imgi = cvLoadImage("lena.bmp", CV_LOAD_IMAGE_GRAYSCALE);
	IplImage *source_imgf = cvCreateImage(cvSize(source_imgi->width, source_imgi->height), IPL_DEPTH_32F, source_imgi->nChannels);
	cvConvert(source_imgi, source_imgf);

	assert( source_imgi->width==512 && source_imgi->height == 512);
	IplImage *imgAi, *imgBi;
	IplImage *imgAf, *imgBf;

	// cvSaveImage("lena.jpg", imgB, 0);

	for(int n = max_size; n >= min_size; nrounds*=2, n /= 2){
		printf("Size %d with %d rounds average:\n", n, nrounds);

		imgAi = cvCreateImage(cvSize(n, n), source_imgi->depth, source_imgi->nChannels);
		imgAf = cvCreateImage(cvSize(n, n), IPL_DEPTH_32F, source_imgi->nChannels);
		imgBi = cvCreateImage(cvSize(n, n), source_imgi->depth, source_imgi->nChannels);
		imgBf = cvCreateImage(cvSize(n, n), IPL_DEPTH_32F, source_imgi->nChannels);

		cvResize(source_imgi, imgAi, CV_INTER_LINEAR);
		cvResize(source_imgf, imgAf, CV_INTER_LINEAR);

		gettimeofday(&t0, NULL);
		for (int k = 0; k < nrounds; k++){
			cvSmooth(imgAi, imgBi, CV_GAUSSIAN, 3, 0 ,0, 0);
		}
		gettimeofday(&t1, NULL);
		printf("\tcvSmooth: \t%f\n", time_diff(t0, t1, nrounds));

		gettimeofday(&t0, NULL);
		for (int k = 0; k < nrounds; k++){
			cvSobel(imgAf, imgBf, 0, 1, 3);
		}
		gettimeofday(&t1, NULL);
		printf("\tcvSobel: \t%f\n", time_diff(t0, t1, nrounds));

		gettimeofday(&t0, NULL);
		for (int k = 0; k < nrounds; k++){
			cvDCT(imgAf, imgBf, CV_DXT_FORWARD);
		}
		gettimeofday(&t1, NULL);
		printf("\tcvDCT: \t\t%f\n", time_diff(t0, t1, nrounds));

		gettimeofday(&t0, NULL);
		for (int k = 0; k < nrounds; k++){
			cvDFT(imgAf, imgBf, CV_DXT_FORWARD, 0);
		}
		gettimeofday(&t1, NULL);
		printf("\tcvDFT: \t\t%f\n", time_diff(t0, t1, nrounds));

		cvReleaseImage(&imgAi);
		cvReleaseImage(&imgAf);
		cvReleaseImage(&imgBi);
		cvReleaseImage(&imgBf);
	}
}

float time_diff(struct timeval start, struct timeval end, int rounds){

	return ((end.tv_sec - start.tv_sec) * 1000000.0 +
			end.tv_usec - start.tv_usec) / (float)rounds;

}

Tags: , ,

3 Responses to “OpenCV在iOS和x86平台上的性能测试”

  1. [...] 老杨 分类: 新闻 标签: arm, benchmark, ipod touch, opencv, x86 评论 (0) Trackbacks (0) [...]

  2. wangyubin 说道:

    老杨,你的那些数据图标使用什么工具绘制的啊?看起来很漂亮。
    求指导。

Leave a Reply