@InProceedings{ Garcke.Griebel:2001, author = {J. Garcke and M. Griebel}, title = {Data mining with sparse grids using simplicial basis functions}, booktitle = {Proceedings of the Seventh ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, San Francisco, USA}, pages = {87--96}, editor = {F. Provost and R. Srikant}, optnote = {also as SFB 256 Preprint 713, Universit\"at Bonn, 2001}, http = {http://doi.acm.org/10.1145/502512.502528}, doi = {doi:10.1145/502512.502528}, ps = {http://wissrech.ins.uni-bonn.de/research/pub/garcke/kdd.ps.gz} , pdf = {http://wissrech.ins.uni-bonn.de/research/pub/garcke/kdd.pdf} , abstract = {Recently we presented a new approach to the classification problem arising in data mining. It is based on the regularization network approach but, in contrast to other methods which employ ansatz functions associated to data points, we use a grid in the usually high-dimensional feature space for the minimization process. To cope with the curse of dimensionality, we employ sparse grids. Thus, only $O(h_n^{-1} n^{d-1})$ instead of $O(h_n^{-d})$ grid points and unknowns are involved. Here $d$ denotes the dimension of the feature space and $h_n = 2^{-n}$ gives the mesh size. We use the sparse grid combination technique where the classification problem is discretized and solved on a sequence of conventional grids with uniform mesh sizes in each dimension. The sparse grid solution is then obtained by linear combination. In contrast to our former work, where $d$-linear functions were used, we apply now linear basis functions based on a simplicial discretization. These allow to handle more dimensions and the algorithms needs less operations per data point. We describe the sparse grid combination technique for the classification problem, give implementational details and discuss the complexity of the algorithm. It turns out that the method scales linear with the number of given data points. Finally we report on the quality of the classifier built by our new method on data sets in up to 10 dimensions. It turns out that our new method achieves correctness rates which are competitive to that of the best existing methods. }, year = {2001}, annote = {proc_ref} }