@Article{	  Garcke.Griebel:2002,
  author	= {J. Garcke and M. Griebel},
  title		= {Classification with sparse grids using simplicial basis
		  functions},
  journal	= {Intelligent Data Analysis},
  year		= {2002},
  volume	= {6},
  number	= {6},
  pages		= {483--502},
  http		= {http://iospress.metapress.com/openurl.asp?genre=article&issn=1088-467X&volume=6&issue=6&spage=483}
		  ,
  note		= {(shortened version appeared in KDD 2001, Proc. of the
		  Seventh ACM SIGKDD, F. Provost and R. Srikant (eds.), 87-96, ACM, 2001)},
  ps		= {http://wissrech.ins.uni-bonn.de/research/pub/garcke/ida2002.ps.gz}
		  ,
  pdf		= {http://wissrech.ins.uni-bonn.de/research/pub/garcke/ida2002.pdf}
		  ,
  abstract	= {Recently we presented a new approach
		  \cite{Garcke.Griebel.Thess:2000} to the classification
		  problem arising in data mining. It is based on the
		  regularization network approach but in contrast to other
		  methods, which employ ansatz functions associated to data
		  points, we use a grid in the usually high-dimensional
		  feature space for the minimization process. To cope with
		  the curse of dimensionality, we employ sparse grids
		  \cite{Zenger:1991}. Thus, only $O(h_n^{-1} n^{d-1})$
		  instead of $O(h_n^{-d})$ grid points and unknowns are
		  involved. Here $d$ denotes the dimension of the feature
		  space and $h_n = 2^{-n}$ gives the mesh size. We use the
		  sparse grid combination technique
		  \cite{Griebel.Schneider.Zenger:1992} where the
		  classification problem is discretized and solved on a
		  sequence of conventional grids with uniform mesh sizes in
		  each dimension. The sparse grid solution is then obtained
		  by linear combination.
		  
		  The method computes a nonlinear classifier but scales only
		  linearly with the number of data points and is well suited
		  for data mining applications where the amount of data is
		  very large, but where the dimension of the feature space is
		  moderately high. In contrast to our former work, where
		  $d$-linear functions were used, we now apply linear basis
		  functions based on a simplicial discretization. This allows
		  to handle more dimensions and the algorithm needs less
		  operations per data point. We further extend the method to
		  so-called anisotropic sparse grids, where now different
		  a-priori chosen mesh sizes can be used for the
		  discretization of each attribute. This can improve the run
		  time of the method and the approximation results in the
		  case of data sets with different importance of the
		  attributes.
		  
		  We describe the sparse grid combination technique for the
		  classification problem, give implementational details and
		  discuss the complexity of the algorithm. It turns out that
		  the method scales linearly with the number of given data
		  points. Finally we report on the quality of the classifier
		  built by our new method on data sets with up to 14
		  dimensions. We show that our new method achieves
		  correctness rates which are competitive to those of the
		  best existing methods. },
  annote	= {article,data}
}