@InProceedings{ Garcke.Griebel:2001,
author = {J. Garcke and M. Griebel},
title = {Data mining with sparse grids using simplicial basis
functions},
booktitle = {Proceedings of the Seventh ACM SIGKDD International
Conference on Knowledge Discovery and Data Mining, San
Francisco, USA},
pages = {87--96},
editor = {F. Provost and R. Srikant},
optnote = {also as SFB 256 Preprint 713, Universit\"at Bonn, 2001},
http = {http://doi.acm.org/10.1145/502512.502528},
doi = {doi:10.1145/502512.502528},
ps = {http://wissrech.ins.uni-bonn.de/research/pub/garcke/kdd.ps.gz}
,
pdf = {http://wissrech.ins.uni-bonn.de/research/pub/garcke/kdd.pdf}
,
abstract = {Recently we presented a new approach to the classification
problem arising in data mining. It is based on the
regularization network approach but, in contrast to other
methods which employ ansatz functions associated to data
points, we use a grid in the usually high-dimensional
feature space for the minimization process. To cope with
the curse of dimensionality, we employ sparse grids. Thus,
only $O(h_n^{-1} n^{d-1})$ instead of $O(h_n^{-d})$ grid
points and unknowns are involved. Here $d$ denotes the
dimension of the feature space and $h_n = 2^{-n}$ gives the
mesh size. We use the sparse grid combination technique
where the classification problem is discretized and solved
on a sequence of conventional grids with uniform mesh sizes
in each dimension. The sparse grid solution is then
obtained by linear combination. In contrast to our former
work, where $d$-linear functions were used, we apply now
linear basis functions based on a simplicial
discretization. These allow to handle more dimensions and
the algorithms needs less operations per data point.
We describe the sparse grid combination technique for the
classification problem, give implementational details and
discuss the complexity of the algorithm. It turns out that
the method scales linear with the number of given data
points. Finally we report on the quality of the classifier
built by our new method on data sets in up to 10
dimensions. It turns out that our new method achieves
correctness rates which are competitive to that of the best
existing methods. },
year = {2001},
annote = {proc_ref}
}