@Article{ Garcke.Griebel:2002,
author = {J. Garcke and M. Griebel},
title = {Classification with sparse grids using simplicial basis
functions},
journal = {Intelligent Data Analysis},
year = {2002},
volume = {6},
number = {6},
pages = {483--502},
http = {http://iospress.metapress.com/openurl.asp?genre=article&issn=1088-467X&volume=6&issue=6&spage=483}
,
note = {(shortened version appeared in KDD 2001, Proc. of the
Seventh ACM SIGKDD, F. Provost and R. Srikant (eds.), 87-96, ACM, 2001)},
ps = {http://wissrech.ins.uni-bonn.de/research/pub/garcke/ida2002.ps.gz}
,
pdf = {http://wissrech.ins.uni-bonn.de/research/pub/garcke/ida2002.pdf}
,
abstract = {Recently we presented a new approach
\cite{Garcke.Griebel.Thess:2000} to the classification
problem arising in data mining. It is based on the
regularization network approach but in contrast to other
methods, which employ ansatz functions associated to data
points, we use a grid in the usually high-dimensional
feature space for the minimization process. To cope with
the curse of dimensionality, we employ sparse grids
\cite{Zenger:1991}. Thus, only $O(h_n^{-1} n^{d-1})$
instead of $O(h_n^{-d})$ grid points and unknowns are
involved. Here $d$ denotes the dimension of the feature
space and $h_n = 2^{-n}$ gives the mesh size. We use the
sparse grid combination technique
\cite{Griebel.Schneider.Zenger:1992} where the
classification problem is discretized and solved on a
sequence of conventional grids with uniform mesh sizes in
each dimension. The sparse grid solution is then obtained
by linear combination.
The method computes a nonlinear classifier but scales only
linearly with the number of data points and is well suited
for data mining applications where the amount of data is
very large, but where the dimension of the feature space is
moderately high. In contrast to our former work, where
$d$-linear functions were used, we now apply linear basis
functions based on a simplicial discretization. This allows
to handle more dimensions and the algorithm needs less
operations per data point. We further extend the method to
so-called anisotropic sparse grids, where now different
a-priori chosen mesh sizes can be used for the
discretization of each attribute. This can improve the run
time of the method and the approximation results in the
case of data sets with different importance of the
attributes.
We describe the sparse grid combination technique for the
classification problem, give implementational details and
discuss the complexity of the algorithm. It turns out that
the method scales linearly with the number of given data
points. Finally we report on the quality of the classifier
built by our new method on data sets with up to 14
dimensions. We show that our new method achieves
correctness rates which are competitive to those of the
best existing methods. },
annote = {article,data}
}