Publications


"Page Classifier and Placer: A Scheme of Managing Hybrid Caches". Xin Yu, Xuanhua Shi, Hai Jin, Xiaofei Liao, Song Wu and Xiaoming Li. the 11th IFIP International Conference on Network and Parallel Computing (NPC 2014), Yilan, Taiwan. September, 2014. [bib]

					@incollection{yu2014page,
							year={2014},
							isbn={978-3-662-44916-5},
							booktitle={Network and Parallel Computing},
							volume={8707},
							series={Lecture Notes in Computer Science},
							editor={Hsu, Ching-Hsien and Shi, Xuanhua and Salapura, Valentina},
							doi={10.1007/978-3-662-44917-2_2},
							title={Page Classifier and Placer: A Scheme of Managing Hybrid Caches},
							url={http://dx.doi.org/10.1007/978-3-662-44917-2_2},
							publisher={Springer Berlin Heidelberg},
							keywords={hybrid cache; page coloring; multi-core},
							author={Yu, Xin and Shi, Xuanhua and Jin, Hai and Liao, Xiaofei and Wu, Song and Li, Xiaoming},
							pages={10-22},
					}
                

"Input-adaptive Parallel Sparse Fast Fourier Transform for Stream Processing". Shuo Chen and Xiaoming Li. The 28th International Conference on Supercomputing (ICS 2014), Munich, Germany, June, 2014. [bib]

					@inproceedings{chen2014input,
						author = {Chen, Shuo and Li, Xiaoming},
						title = {Input-adaptive Parallel Sparse Fast Fourier Transform for Stream Processing},
						booktitle = {Proceedings of the 28th ACM International Conference on Supercomputing},
						series = {ICS '14},
						year = {2014},
						isbn = {978-1-4503-2642-1},
						location = {Munich, Germany},
						pages = {93--102},
						numpages = {10},
						url = {http://doi.acm.org/10.1145/2597652.2597669},
						doi = {10.1145/2597652.2597669},
						acmid = {2597669},
						publisher = {ACM},
						address = {New York, NY, USA},
						keywords = {input adaptive, parallel algorithm, sparse fft, stream processing},
					}
 				

"ASAFESSS: A Scheduler-Driven Adaptive Framework for Extreme Scale Software Stacks". Tom St. John, Benoit Meister, Andres Marquez, Joseph Manzano, Guang Gao and Xiaoming Li. The 4th International Workshop on Adaptive Self-tuning Computing Systems (ADAPT), Vienna, Austria. 2013 (Best Paper Award). [bib]

					@inproceedings{stjohn14input,
						author = {St. John, Tom and Meister, Beno\^{\i}t and Marquez, Andres and Manzano, Joseph B. and Gao, Guang R. and Li, Xiaoming},
						title = {ASAFESSS: A Scheduler-driven Adaptive Framework for Extreme Scale Software Stacks},
						booktitle = {Proceedings of International Workshop on Adaptive Self-tuning Computing Systems},
						series = {ADAPT '14},
						year = {2014},
						isbn = {978-1-4503-2514-1},
						location = {Vienna, Austria},
						pages = {21:21--21:23},
						articleno = {21},
						numpages = {3},
						url = {http://doi.acm.org/10.1145/2553062.2553063},
						doi = {10.1145/2553062.2553063},
						acmid = {2553063},
						publisher = {ACM},
						address = {New York, NY, USA},
						keywords = {Adaptive Optimization, Data Compression},
					}
                

"A Hybrid GPU/CPU FFT Library for Large FFT Problems". Shuo Chen and Xiaoming Li. Proceedings of The 32nd IEEE International Performance Computing and Communications Conference, IPCCC 2013, San Diego, CA, USA. [bib]

					@INPROCEEDINGS{chen2013hybrid,
					 author={Shuo Chen and Xiaoming Li},
					 booktitle={Performance Computing and Communications Conference (IPCCC), 2013 IEEE 32nd International},
					 title={A hybrid GPU/CPU FFT library for large FFT problems},
					 year={2013},
					 month={Dec},
					 pages={1-10},
					 keywords={fast Fourier transforms;graphics processing units;peripheral interfaces;3D FFT;CPU FFT library;CPU computing power;FFT computation;FFT decomposition paradigms;GPU memory;GPU performance;GeForce GTX480;Intel MKL;PCI channel;Tesla C2070;Tesla C2075;current GPU based FFT implementation;data transfer;empirical tuning process;flexible partitioning;graphic processing units;hybrid GPU;hybrid parallel framework;large FFT problems;large size fast Fourier transform;limited memory size;memory-transfer controller;multicore CPU;optimal load balancing;performance modeling;tailor computation;Graphics processing units},
					 doi={10.1109/PCCC.2013.6742796},}
				

"An Input-Adaptive Algorithm for High Performance Sparse Fast Fourier Transform". Shuo Chen and Xiaoming Li. Proceedings of The 26th International Workshop on Languages and Compilers for Parallel Computing (LCPC 2013), San Jose, CA. 2013.

"Static micro-scheduling: Resource contention relief in multithreaded programs". Yuanfang Chen and Xiaoming. Proceedings of the 31st IEEE International Performance Computing and Communications Conference, IPCCC 2012, Austin, TX, USA, December 1-3, 2012 [bib]

					@INPROCEEDINGS{chen2012static,
					 author={Yuanfang Chen and Xiaoming Li},
					 booktitle={Performance Computing and Communications Conference (IPCCC), 2012 IEEE 31st International},
					 title={Static micro-scheduling: Resource contention relief in multithreaded programs},
					 year={2012},
					 month={Dec},
					 pages={187-188},
					 keywords={multi-threading;resource allocation;scheduling;NAS Parallel Benchmark;computer resource;contention cycle;contention mitigation;contention searching algorithm;low-overhead hardware counter based profiling method;memory contention loop;multithreaded program;multithreading task;parallelism;resource contention;static microscheduling;time-relevant contention location;Benchmark testing;Hardware;Instruction sets;Message systems;Multicore processing;Radiation detectors;USA Councils;compiler;contention;hardware counter;optimization;profiling},
					 doi={10.1109/PCCC.2012.6407691},
					 ISSN={1097-2641},}
					

"Source Code Partitioning in Program Optimization". Murat Bolat, Kirk Kelsey, Xiaoming Li and Guang R. Gao. Proceedings of the 17th IEEE International Conference on Parallel and Distributed Systems (ICPADS) 2011. Tainan, Taiwan, December 2011. [bib]

					@article{bolat2011source,
					author = {Murat Bolat and Kirk Kelsey and Xiaoming Li and Guang R. Gao},
					title = {Source Code Partitioning in Program Optimization},
					journal ={Parallel and Distributed Systems, International Conference on},
					volume = {0},
					issn = {1521-9097},
					year = {2011},
					pages = {56-63},
					doi = {http://doi.ieeecomputersociety.org/10.1109/ICPADS.2011.125},
					publisher = {IEEE Computer Society},
					address = {Los Alamitos, CA, USA},
					}
					

"Towards an integrated multiscale simulation of turbulent clouds on PetaScale computers". Lian-Ping Wang, Orlando Ayala, Hossein Parishani, Wojciech W Grabowski, Andrzej A Wyszogrodzki, Zbigniew Piotrowski, Guang R Gao, Chandra Kambhamettu, Xiaoming Li, Louis Rossi, Daniel Orozco and Claudio Torres. Proccedings of the 13th European Turbulence Conference (ETC13). Warsaw, Poland, September, 2011.

"A Code Merging Optimization Technique for GPGPU", Ryan Taylor and Xiaoming Li. Proceedings of The 24th International Workshop on Languages and Compilers for Parallel Computing (LCPC 2011), Fort Collins, Colorado, USA, September, 2011. [bib]

					@incollection{taylor2011code,
					year={2013},
					isbn={978-3-642-36035-0},
					booktitle={Languages and Compilers for Parallel Computing (LCPC)},
					volume={7146},
					series={Lecture Notes in Computer Science},
					editor={Rajopadhye, Sanjay and Mills Strout, Michelle},
					doi={10.1007/978-3-642-36036-7_15},
					title={A Code Merging Optimization Technique for GPU},
					url={http://dx.doi.org/10.1007/978-3-642-36036-7_15},
					publisher={Springer Berlin Heidelberg},
					author={Taylor, Ryan and Li, Xiaoming},
					pages={218-236}
					}
					

"Using GPUs to Compute Large Out-of-card FFTs", Liang Gu, Jakob Siegel and Xiaoming Li. 25th International Conference on Supercomputing (ICS 2011), Tucson, Arizona, USA, June, 2011. [bib]

					@inproceedings{gu2011using,
					 author = {Gu, Liang and Siegel, Jakob and Li, Xiaoming},
					 title = {Using GPUs to Compute Large Out-of-card FFTs},
					 booktitle = {Proceedings of the International Conference on Supercomputing},
					 series = {ICS '11},
					 year = {2011},
					 isbn = {978-1-4503-0102-2},
					 location = {Tucson, Arizona, USA},
					 pages = {255--264},
					 numpages = {10},
					 url = {http://doi.acm.org/10.1145/1995896.1995937},
					 doi = {10.1145/1995896.1995937},
					 acmid = {1995937},
					 publisher = {ACM},
					 address = {New York, NY, USA},
					 keywords = {fft dft library gpu cuda},
					}
					

"Soft Error Propagation in Floating-Point Programs", Sha Li and Xiaoming Li. Proceedings of International Performance Computing and Communications Conference (IPCCC 2010), Albuquerque, New Mexico, USA, December, 2010.

"Efficient Sparse Matrix-Matrix Multiplication on Heterogeneous High Performance Systems", Jakob Siegel, Oreste Villa, Sriram Krishnamoorthy, Antonino Tumeo and Xiaoming Li. Pro- ceedings of The Workshop on Application/Architecture Co-design for Extreme-scale Computing (AACEC) in conjunction with the IEEE International Conference on Cluster Computing 2010 (Cluster 2010). Crete Greece, September 2010.

"A Micro-benchmark Suite for AMD GPUs", Ryan Taylor and Xiaoming Li. Proceedings of the Third International Workshop on Parallel Programming Models and Systems Software for High- End Computing (P2S2) in conjunctioin with The 39th International Conference on Parallel Processing (ICPP'10), San Diego, CA, September 2010. [bib]

					@INPROCEEDINGS{taylor2010micro,
						author={Taylor, R. and Xiaoming Li},
						booktitle={Third International Workshop on Parallel Programming Models and Systems Software for High-
            End Computing (P2S2), Parallel Processing Workshops (ICPPW), 2010 39th International Conference on},
						title={A Micro-benchmark Suite for AMD GPUs},
						year={2010},
						month={Sept},
						pages={387-396},
						doi={10.1109/ICPPW.2010.59},
						ISSN={1530-2016},}
				

"Software-based predication for AMD GPUs", Ryan Taylor, Xiaoming Li. Proceedings of Inter- national Workshop on Highly-Efficientcient Accelerators and Reconfigurable Technologies (HEART) in conjunction with The 24th International Conference on Supercomputing (ICS'10), Tsukuba, Japan, June, 2010. [bib]

					@article{taylor2010software,
						author = {Taylor, Ryan and Li, Xiaoming},
						title = {Software-based Branch Predication for AMD GPUs},
						journal = {SIGARCH Comput. Archit. News},
						issue_date = {September 2010},
						volume = {38},
						number = {4},
						month = jan,
						year = {2011},
						issn = {0163-5964},
						pages = {66--72},
						numpages = {7},
						url = {http://doi.acm.org/10.1145/1926367.1926379},
						doi = {10.1145/1926367.1926379},
						acmid = {1926379},
						publisher = {ACM},
						address = {New York, NY, USA},
					} 

"An Empirically Tuned 2D and 3D FFT Library on CUDA GPU", Liang Gu, Xiaoming Li and Jakob Siegel. Proceedings of International Conference on Supercomputing (ICS 2010). Tsukuba, Japan. June, 2010. [bib]

					@inproceedings{gu2010empirically,
					 author = {Gu, Liang and Li, Xiaoming and Siegel, Jakob},
					 title = {An Empirically Tuned 2D and 3D FFT Library on CUDA GPU},
					 booktitle = {Proceedings of the 24th ACM International Conference on Supercomputing},
					 series = {ICS '10},
					 year = {2010},
					 isbn = {978-1-4503-0018-6},
					 location = {Tsukuba, Ibaraki, Japan},
					 pages = {305--314},
					 numpages = {10},
					 url = {http://doi.acm.org/10.1145/1810085.1810127},
					 doi = {10.1145/1810085.1810127},
					 acmid = {1810127},
					 publisher = {ACM},
					 address = {New York, NY, USA},
					 keywords = {2D FFT, 3D FFT, CUDA, GPU, empirical tuning, library generation},
					}
				

"Context-aware Code Optimization", Murat Bolat and Xiaoming Li. Proceedings of Interna- tional Performance Computing and Communications Conference (IPCCC 2009), Phoenix, Arizona, USA, December, 2009. [bib]

					@INPROCEEDINGS{bolat2009context,
					 author={Bolat, Murat and Xiaoming Li},
					 booktitle={Performance Computing and Communications Conference (IPCCC), 2009 IEEE 28th International},
					 title={Context-aware code optimization},
					 year={2009},
					 month={Dec},
					 pages={256-263},
					 keywords={optimisation;program compilers;ubiquitous computing;SPEC2000 benchmarks;SPEC2006 benchmarks;block sequence;context-aware code optimization;execution frequency;feedback-driven program optimization technique;program compilation technique;program segment;program source code;single code segment;Benchmark testing;Computer displays;Dynamic compiler;Frequency;Optimization methods;Runtime;Search methods},
					 doi={10.1109/PCCC.2009.5403838},
					 ISSN={1097-2641},}
				

"Iterative Layer-Based Raytracing on CUDA", Alejandro Segovia, Xiaoming Li and Guang Gao. Proceedings of International Performance Computing and Communications Conference (IPCCC 2009), Phoenix, Arizona, USA, December, 2009. [bib]

					@INPROCEEDINGS{segovia2009iterative,
					 author={Segovia, A. and Xiaoming Li and Guang Gao},
					 booktitle={Performance Computing and Communications Conference (IPCCC), 2009 IEEE 28th International},
					 title={Iterative layer-based raytracing on CUDA},
					 year={2009},
					 month={Dec},
					 pages={248-255},
					 keywords={parallel algorithms;ray tracing;recursive estimation;NVIDIA CUDA architecture;image generation process;iterative layer-based raytracing algorithm;parallel algorithm;raytracer algorithm;recursive algorithm;recursive function calls;sequential algorithm;Algorithm design and analysis;Application software;Cameras;Computer architecture;Image generation;Iterative algorithms;Layout;Pixel;Power generation;Rendering (computer graphics)},
					 doi={10.1109/PCCC.2009.5403843},
					 ISSN={1097-2641},}
				

"DFT Performance Prediction in FFTW", Liang Gu and Xiaoming Li. Proceedings of Languages and Compilers for Parallel Computing, 22nd International Workshop, (LCPC 2009), Newark, Delaware, USA, October, 2009. [bib]

					@incollection{gu2010dft,
					year={2010},
					isbn={978-3-642-13373-2},
					booktitle={Languages and Compilers for Parallel Computing},
					volume={5898},
					series={Lecture Notes in Computer Science},
					editor={Gao, GuangR. and Pollock, LoriL. and Cavazos, John and Li, Xiaoming},
					doi={10.1007/978-3-642-13374-9_10},
					title={DFT Performance Prediction in FFTW},
					url={http://dx.doi.org/10.1007/978-3-642-13374-9_10},
					publisher={Springer Berlin Heidelberg},
					author={Gu, Liang and Li, Xiaoming},
					pages={140-156}
					}
				

"CUDA Memory Optimizations for Large Data-Structures in the Gravit Simulator", Jakob Siegel, Juergen Ributzka and Xiaoming Li. International Workshop on Simulation and Modelling. Proceedings of The 38th International Conference on Parallel Processing (ICPP) 2009, September 2009. [bib]

					@INPROCEEDINGS{siegel2009cuda,
					 author={Siegel, J. and Ributzka, J. and Xiaoming Li},
					 booktitle={Parallel Processing Workshops, 2009. ICPPW '09. International Conference on},
					 title={CUDA Memory Optimizations for Large Data-Structures in the Gravit Simulator},
					 year={2009},
					 month={Sept},
					 pages={174-181},
					 keywords={computer graphics;coprocessors;data structures;general purpose computers;gravity;memory cards;optimisation;parallel algorithms;CUDA memory optimizations;GPU access patterns;Gravit simulator;data structures;embarassingly parallel algorithms;general purpose CPU;gravitational forces;memory usage;program optimization;Acceleration;Computational modeling;Computer simulation;Graphics;Hardware;Parallel algorithms;Parallel processing;Programming profession;Scientific computing;Yarn;CUDA;GPGPU;memory layout;n-body;optimization},
					 doi={10.1109/ICPPW.2009.78},
					 ISSN={1530-2016},}
				

"An Empirically Optimized Radix Sort for GPU", Bonan Huang, Jinlan Gao and Xiaoming Li. Proceedings of the IEEE International Symposium on Parallel and Distributed Processing with Applications (ISPA) 2009, August, 2009. [bib]

					@article{huang2009empirically,
					author = {Bonan Huang and Jinlan Gao and Xiaoming Li},
					title = {An Empirically Optimized Radix Sort for GPU},
					journal ={International Symposium on Parallel and Distributed Processing with Applications},
					volume = {0},
					isbn = {978-0-7695-3747-4},
					year = {2009},
					pages = {234-241},
					doi = {http://doi.ieeecomputersociety.org/10.1109/ISPA.2009.89},
					publisher = {IEEE Computer Society},
					address = {Los Alamitos, CA, USA},
					}
				

"A Model-driven Optimization for FFTW", Liang Gu and Xiaoming Li. Poster. Proceedings of the 23rd International Conference on Supercomputing (ICS) 2009, June, 2009.

"A Control-structure Splitting Optimization for GPGPU", Snaider Carillo, Jakob Siegel and Xiaoming Li, Proceedings of ACM International Conference on Computing Frontier (CF) 2009, March, 2009. [bib]

					@inproceedings{carrillo2009control,
					 author = {Carrillo, Snaider and Siegel, Jakob and Li, Xiaoming},
					 title = {A Control-structure Splitting Optimization for GPGPU},
					 booktitle = {Proceedings of the 6th ACM Conference on Computing Frontiers},
					 series = {CF '09},
					 year = {2009},
					 isbn = {978-1-60558-413-3},
					 location = {Ischia, Italy},
					 pages = {147--150},
					 numpages = {4},
					 url = {http://doi.acm.org/10.1145/1531743.1531766},
					 doi = {10.1145/1531743.1531766},
					 acmid = {1531766},
					 publisher = {ACM},
					 address = {New York, NY, USA},
					 keywords = {cuda, gpgpu, optimizations},
					}
					

"Dynamic Optimization Option Search in GCC", Eunjung Park, Mihailo Kaplarevic, Yingping Zhang, Xiaoming Li and Guang R. Gao, GCC Developers’ Summit, July, 2007. [bib]

					@INPROCEEDINGS{park2007dynamic,
					 author={Eunjung Park, Mihailo Kaplarevic, Yingping Zhang, Xiaoming Li and Guang R. Gao.},
					 booktitle={GCC Developers’ Summit},
					 title={Dynamic Optimization Option Search in GCC},
					 pages={153-162},
					 year={2007},
					 month={July 18-20},}
				

"Automatic Program Segment Similarity Detection in Targeted Program Performance Improvement", Haiping Wu, Eunjung Park, Mihailo Kaplarevic, Yingping Zhang, Murat Bolat, Xiaoming Li, Guang R. Gao, Workshop on Performance Optimization for High-Level Languages and Libraries, in conjunction with 21st IEEE International Parallel & Distributed Processing Symposium (IPDPS). March 2007. [bib]

					@INPROCEEDINGS{wu2007automatic,
						author={Wu, H. and Eunjung Park and Kaplarevic, M. and Zhang, Y. and Bolat, M. and Xiaoming Li and Gao, G.R.},
						booktitle={Parallel and Distributed Processing Symposium, 2007. IPDPS 2007. IEEE International},
						title={Automatic Program Segment Similarity Detection in Targeted Program Performance Improvement},
						year={2007},
						month={March},
						pages={1-8},
						keywords={optimising compilers;program diagnostics;software performance evaluation;GCC 3.3;Intel XScale PXA255 platform;architecture-dependent behavior similarity;automatic performance sensitive program segment detection;optimization compiler;program performance improvement;program segment optimization;program segment similarity detection;proxy segment template database;syntax structure;Data structures;Databases;High performance computing;Kernel;Learning systems;Optimization methods;Optimizing compilers;Program processors},
						doi={10.1109/IPDPS.2007.370642},}
				

"Experience of Optimizing FFT on Intel Architectures", Daniel Orozco, Liping Xue, Murat Bolat, Xiaoming Li, Guang R. Gao. Workshop on Performance Optimization for High-Level Languages and Libraries, in conjunction with 21st IEEE International Parallel & Distributed Processing Symposium (IPDPS). March 2007. [bib]

				@INPROCEEDINGS{orozco2007fft,
				 author={Orozco, D. and Xue, L. and Bolat, M. and Xiaoming Li and Gao, G.R.},
				 booktitle={Parallel and Distributed Processing Symposium, 2007. IPDPS 2007. IEEE International},
				 title={Experience of Optimizing FFT on Intel Architectures},
				 year={2007},
				 month={March},
				 pages={1-8},
				 keywords={computer architecture;fast Fourier transforms;mathematics computing;optimising compilers;software libraries;FFT code optimization;FFT micro kernel optimization;Intel computer architecture;automatic library generator;code generation;hardware counter;memory hierarchy locality enhancement;Art;Assembly;Computer architecture;Counting circuits;Documentation;Hardware;Kernel;Prefetching;Software libraries;Spirals},
				 doi={10.1109/IPDPS.2007.370638},}
				

"Analyzing the Use of a Software Modeling Tool". Xiaoming Li, Daryl Shannon, Jabari Walker, Sarfraz Khurshid, Darko Marinov. The Sixth Workshop on Language Descriptions, Tools and Applications (LDTA 2006). April 2006. [bib]

					@INPROCEEDINGS{li2006analyzing,
						author={Xiaoming Li and Daryl Shannon and Jabari Walker and Sarfraz Khurshid and Darko Marinov},
						booktitle={The Sixth Workshop on Language Descriptions, Tools and Applications (LDTA 2006).},
						title={Analyzing the Use of a Software Modeling Tool},
						year={2006},
						month={April},
						}
				

"Optimizing Sorting with Genetic Algorithm". Xiaoming Li, María Jesús Garzarán, and David Padua.  In Proc. of the 3rd International Symposium on Code Generation and Optimization (CGO-2005), pages 99-110, San Jose, CA, USA, 2005. [bib]

				@inproceedings{li2005optimizing,
				author = {Li, Xiaoming and Garzaran, Maria Jesus and Padua, David},
				title = {Optimizing Sorting with Genetic Algorithms},
				booktitle = {Proceedings of the International Symposium on Code Generation and Optimization},
				series = {CGO '05},
				year = {2005},
				isbn = {0-7695-2298-X},
				pages = {99--110},
				numpages = {12},
				url = {http://dx.doi.org/10.1109/CGO.2005.24},
				doi = {10.1109/CGO.2005.24},
				acmid = {1048979},
				publisher = {IEEE Computer Society},
				address = {Washington, DC, USA},
				}
				

"Is Search Really Necessary to Generate High-Performance BLAS?". Kamen Yotov, Xiaoming Li, Gang Ren, Maria Garzaran, David Padua, Keshav Pingali and Paul Stodghill. Proceedings of the IEEE Special Issue on Program Generation, Optimization, and Platform Adaptation, Vol. 93, No. 2, pages 358-386, February, 2005. [bib]

				@ARTICLE{yotovli2005search,
				 author={Yotov, Kamen and Xiaoming Li and Ren, Gang and Garzaran, Maria Jesus and Padua, David and Pingali, Keshav and Stodghill, Paul},
				 journal={Proceedings of the IEEE},
				 title={Is Search Really Necessary to Generate High-Performance BLAS?},
				 year={2005},
				 month={Feb},
				 volume={93},
				 number={2},
				 pages={358-386},
				 keywords={linear algebra;mathematics computing;optimising compilers;software libraries;ATLAS;BLAS;Basic Linear Algebra Subprograms;code generation;global search engine;library generators;model driven optimization engine;program compilers;program optimization;search based empirical optimization;Analytical models;Computer science;Hardware;Libraries;Linear algebra;Optimizing compilers;Program processors;Programming profession;Search engines;Tiles;Basic Linear Algebra Subprograms (BLAS);compilers;empirical optimization;high-performance computing;library generators;model-driven optimization;program optimization},
				 doi={10.1109/JPROC.2004.840444},
				 ISSN={0018-9219},}
				

"Optimizing Sorting with Genetic Algorithm". Xiaoming Li, María Jesús Garzarán, and David Padua. The 12th International Workshop on Compilers for Parallel Computers (CPC 2006), A Coruna, Spain, January, 2006. (Invited paper).

"Optimizing Matrix Multiplication with a Classifier Learning System".Xiaoming Li and María Jesús Garzarán. Languages and Compilers for Parallel Computing, 16th International Workshop, (LCPC 2005), New York, NY, USA, 2005. [bib]

				@incollection{li2006optimizing,
				year={2006},
				isbn={978-3-540-69329-1},
				booktitle={Languages and Compilers for Parallel Computing},
				volume={4339},
				series={Lecture Notes in Computer Science},
				editor={Ayguadé, Eduard and Baumgartner, Gerald and Ramanujam, J. and Sadayappan, P.},
				doi={10.1007/978-3-540-69330-7_9},
				title={Optimizing Matrix Multiplication with a Classifier Learning System},
				url={http://dx.doi.org/10.1007/978-3-540-69330-7_9},
				publisher={Springer Berlin Heidelberg},
				author={Xiaoming Li and Maria Jesus Garzaran},
				pages={121-135}
				}
				

"Analytic Models and Empirical Search: A Hybrid Approach to Code Optimization". Arkady Epshteyn, María Jesús Garzarán, Gerald DeJong, David Padua, Gang Ren, Xiaoming Li, Kamen Yotov and Keshav Pingali. Languages and Compilers for Parallel Computing, 16th International Workshop, (LCPC 2005), New York, NY, USA, 2005. [bib]

				@incollection{epshteyn2006analytic,
				year={2006},
				isbn={978-3-540-69329-1},
				booktitle={Languages and Compilers for Parallel Computing},
				volume={4339},
				series={Lecture Notes in Computer Science},
				editor={Ayguade, Eduard and Baumgartner, Gerald and Ramanujam, J. and Sadayappan, P.},
				doi={10.1007/978-3-540-69330-7_18},
				title={Analytic Models and Empirical Search: A Hybrid Approach to Code Optimization},
				url={http://dx.doi.org/10.1007/978-3-540-69330-7_18},
				publisher={Springer Berlin Heidelberg},
				author={Epshteyn, Arkady and Garzar\'{a}n, Mar\'{\i}a Jes\'{u}s and DeJong, Gerald and Padua, David and Ren, Gang and Li, Xiaoming and Yotov, Kamen and Pingali, Keshav},
				pages={259-273}
				}
				

"A Dynamically Tuned Sorting Library". Xiaoming Li, María Jesús Garzarán, and David Padua. In Proc. of the International Symposium on Code Generation and Optimization (CGO-2004), pages 111-124, March 2004. [bib]

				@inproceedings{li2004dynamically,
				 author = {Li, Xiaoming and Garzar\'{a}n, Mar\'{\i}a Jes\'{u}s and Padua, David},
				 title = {A Dynamically Tuned Sorting Library},
				 booktitle = {Proceedings of the International Symposium on Code Generation and Optimization: Feedback-directed and Runtime Optimization},
				 series = {CGO '04},
				 year = {2004},
				 isbn = {0-7695-2102-9},
				 location = {Palo Alto, California},
				 pages = {111--},
				 url = {http://dl.acm.org/citation.cfm?id=977395.977663},
				 acmid = {977663},
				 publisher = {IEEE Computer Society},
				 address = {Washington, DC, USA},
				}
				

"A Comparison of Empirical and Model-driven Optimization". Kamen Yotov, Xiaoming Li, Gang Ren, Michael Cibulskis, Gerald DeJong, María Jesús Garzarán, David Padua, Keshav Pingali, Paul Stodghill, and Peng Wu. In Proc. of the International Conference on Programming Language Design and Implementation (PLDI 2003), pages 63-76, June 2003. [bib]

				@inproceedings{yotovli2003comparison,
				 author = {Yotov, Kamen and Li, Xiaoming and Ren, Gang and Cibulskis, Michael and DeJong, Gerald and Garzaran, Maria and Padua, David and Pingali, Keshav and Stodghill, Paul and Wu, Peng},
				 title = {A Comparison of Empirical and Model-driven Optimization},
				 booktitle = {Proceedings of the ACM SIGPLAN 2003 Conference on Programming Language Design and Implementation},
				 series = {PLDI '03},
				 year = {2003},
				 isbn = {1-58113-662-5},
				 location = {San Diego, California, USA},
				 pages = {63--76},
				 numpages = {14},
				 url = {http://doi.acm.org/10.1145/781131.781140},
				 doi = {10.1145/781131.781140},
				 acmid = {781140},
				 publisher = {ACM},
				 address = {New York, NY, USA},
				 keywords = {BLAS, blocking, code generation, compilers, empirical optimization, memory hierarchy, model-driven optimization, program transformation, tiling, unrolling},
				}
				

"Data Dependence Analysis In Presence Of Inheritance and Polymorphism". Xiaoming Li, Daoxu Chen, Li Xie. Proceedings of HPC-Asia2000, Vol. 1, pages 220-228, IEEE Computer Society Press, Beijing, May 2000. [bib]

				@INPROCEEDINGS{li2000dependence,
				 author={Li Xiaoming and Chen Daoxu and Xie Li},
				 booktitle={High Performance Computing in the Asia-Pacific Region, 2000. Proceedings. The Fourth International Conference/Exhibition on},
				 title={Data dependence analysis in presence of inheritance and polymorphism},
				 year={2000},
				 month={May},
				 volume={1},
				 pages={220-228 vol.1},
				 keywords={inheritance;object-oriented languages;object-oriented programming;parallelising compilers;ODAM;automatic parallelizing compilers;data dependence analysis;data structures;dependence analysis;inheritance;object hierarchy graph;object-oriented data dependence analysis model;object-oriented language;pointer;polymorphism;program design;read-write se;software engineering},
				 doi={10.1109/HPC.2000.846548},}
				

"The Design and Implementation of The Scheduling Protocol in JAPS".Xiaoming Li, Daoxu Chen, Li Xie. Journal of Computer Science (Chinese), Vol. 28, No. 1, January 2001.

"PTSP: The Parallel Task Support Platform in JAPS". Xiaoming Li, Daoxu Chen, Li Xie. Journal of Computer Science (Chinese), Vol. 27, No. 7, pages 5-8, July 2000.