%% This BibTeX bibliography file was created using BibDesk.
%% http://bibdesk.sourceforge.net/


%% Created for Kenneth Clarkson at 2014-03-19 19:01:14 -0700 


%% Saved with string encoding Unicode (UTF-8) 


@string{acmcs = {ACM Computing Surveys}}

@string{dcg = {Discrete and Computational Geometry}}

@string{focs = {Symposium on Foundations of Computer Science}}

@string{focs90 = {Proc. 31st IEEE Symp. on Foundations of Computer Science}}

@string{form = {short}}

@string{ieee-com = {IEEE Transactions on Communications}}

@string{ieeeit = {IEEE Transactions on Information Theory}}

@string{jmlr = {Journal of Machine Learning Research}}

@string{pami = {IEEE Transactions on Pattern Analysis and Machine Intelligence}}

@string{prl = {Pattern Recognition Letters}}

@string{proc = {Proceedings of the }}

@string{socg = {Annual Symposium on Computational Geometry}}

@string{soda = {Annual ACM-SIAM Symposium on Discrete Algorithms}}

@string{stoc = {Annual ACM Symposium on Theory of Computing}}

@string{talg = {ACM Transactions on Algorithms}}


@techreport{Clarkson77,
	Address = {Claremont, CA},
	Author = {Kenneth L. Clarkson},
	Institution = {Claremont Graduate School},
	Title = {Implementation of a Model of ${CO}_2$ Diffusion in the Midtroposphere},
	Type = {Technical Report},
	Year = {1977}}

@inproceedings{Clarkson81,
	Address = {Maclean, VA: Science Applications, Inc.},
	Author = {Kenneth L. Clarkson},
	Booktitle = {Proceedings of the DARPA Image Understanding Workshop},
	Title = {A Procedure for Camera Calibration},
	Year = {1981}}

@incollection{Clarkson82,
	Address = {Los Altos, CA},
	Author = {Kenneth L. Clarkson},
	Booktitle = {The Handbook of Artificial Intelligence},
	Category = {Survey},
	Editor = {P. Cohen and E. Feigenbaum},
	Publisher = {William Kaufman, Inc.},
	Title = {Grammatical Inference},
	Year = {1982}}

@article{Clarkson83,
	Author = {Kenneth L. Clarkson},
	Journal = {Information Processing Letters},
	Month = {January},
	Pages = {23--25},
	Title = {A Modification of the Greedy Algorithm for Vertex Cover},
	Volume = {16},
	Year = {1983}}

@inproceedings{ann,
	Address = {Tucson, AZ},
	Author = {Kenneth L. Clarkson},
	Booktitle = {FOCS '83: } # proc # {Twenty-Fourth  } # focs,
	Month = {November},
	Note = {Included in PhD Thesis},
	Title = {Fast Algorithms for the All Nearest Neighbors Problem},
	Url = {thesis_abs.html},
	Year = {1983},
	Abstract = {
            We present new algorithms for the all nearest neighbors problem:
            
            Given a set `S` of `n` sites (points) in `d`-dimensional space, find the
            nearest neighbors in set `S` of each site in `S`.
            
            Our results:
            
            
            An algorithm for solving the all nearest neighbors problem in `O(n\log\sigma)`
            time, where `\sigma` is the ratio of the distance between the farthest pair
            of sites to the distance between the closest pair of sites.  A similar
            algorithm is described for finding all `k`'th nearest neighbors.
            
            
            An algorithm for building a celltree, a compressed form of digital trie,
            in `O(n\log n)` probabilistic time.  The logarithm, floor, and bitwise
            exclusive-or functions are assumed available at unit cost.
            
            
            An algorithm for solving the all nearest neighbors problem in `O(n)` worst-case
            time, given a celltree for the sites.
            
            
            An algorithm for building a celltree in linear expected time, assuming the
            sites are independently identically distributed random variables, with
            an unknown probability density function obeying some very weak conditions.
            
            
           },
	Annote = {
         A "celltree" is now more commonly
         called a "compressed quadtree" or "compressed hyperoctree", and this paper is apparently
         the first appearance of such a construction.  Vaidya
         refined the
         algorithm here to avoid randomness and bit-twiddling, at some cost
         in dependence on `d`; his algorithm, and this one, and
         that of Gabow, Bentley, and Tarjan, all use the same basic
         geometrical observation, which implies that the total
         number of nearest neighbors is `O(n)`, even up to approximate neighbors.
         Callahan and Kosaraju
         used similar ideas for "well-separated pairs decomposition".
         The `\sigma` ratio is now more commonly called the spread.
  },
	Bdsk-Url-1 = {thesis_abs.html}}

@inproceedings{Clarkson84,
	Address = {Washington, DC},
	Author = {Kenneth L. Clarkson},
	Booktitle = {STOC '84: } # proc # {Sixteenth  } # stoc,
	Month = {April},
	Note = {Included in PhD Thesis},
	Title = {Fast Expected-time and Approximation Algorithms for Geometric Minimum Spanning Trees},
	Url = {thesis_abs.html},
	Year = {1984},
	Bdsk-Url-1 = {thesis_abs.html}}

@phdthesis{Clarkson85,
	Author = {Kenneth L. Clarkson},
	Month = {January},
	School = {Stanford University},
	Title = {Algorithms for Closest-point Problems},
	Url = {thesis_abs.html},
	Year = {1985},
	Abstract = {
     This dissertation reports a variety of new algorithms for solving
     closest-point problems.  The input to these algorithms is a set or
     sets of points in `d`-dimensional
     space, with an associated `L_p` metric.  The problems considered
     are:

     
      
       The all nearest neighbors
        problem.  For point set `A`, find the nearest neighbors in `A`
       of each point in `A`.
       
The nearest foreign neighbor
        problem.  For point sets `A` and `B`, find the closest point
        in `B` to each point in `A`.
       The geometric minimum spanning
     tree problem.  For point set `A`, find the minimum spanning tree
     for the complete weighted undirected graph associated with `A`,
     where the vertices of the graph
     correspond to the points of `A`,
     and the weight of an edge is the distance between the points defining
     the edge.
     

     These problems arise in routing, statistical classification, data
     compression, and other areas.  Obvious algorithms for them require
     a running time quadratic in `n`,
     the number of points in the input.  In many cases they can be
     solved with algorithms requiring `O(n \log ^{O(1)} n)` time.

     

     
     In this work, approximation algorithms for some cases of these problems
     have been found.  For example, for the minimum spanning tree
     problem with the `L_1`
     metric, an algorithm has been devised that requires `O(n \log^d (1/\rho))`
     time to find a spanning
     tree with weight within `1+\rho`
     of the minimum.  Several other algorithms have been found with
     time bounds dependent on `\log(1/\rho)`
     for attaining error `\rho`.

     

     Algorithms have also been found that require linear expected time, for
     independent identically distributed random input points with a
     probability density function satisfying weak conditions.  One such
     algorithm depends on the fact that under certain conditions, values
     that are identically distributed, but dependent, can be bucket sorted
     in linear expected time.

     

     An algorithm has been found for the all nearest neighbors problem that
     requires `O(n \log n)` expected time for any input set
     of points, where the expectation is on the random sampling performed by
     the algorithm.  This algorithm involves the construction of a new
     data structure, a compressed form of digital trie.
   },
	Bdsk-Url-1 = {thesis_abs.html}}

@article{rpo,
	Appearedas = {A Probabilistic Algorithm for the Post Office Problem},
	Appearedin = {STOC '85: } # proc # {Seventeenth  } # stoc,
	Author = {Kenneth L. Clarkson},
	Ayear = {1985},
	Journal = sicomp,
	Pages = {830-847},
	Pdf = {rpo/p.pdf},
	Ps = {rpo/p.ps},
	Title = {A Randomized Algorithm for Closest-Point Queries},
	Year = {1988},
	Abstract = {
   An algorithm for closest-point queries is given.
   The problem is this: given a set `S` of `n` points in
   `d`-dimensional space, build a data structure so that
   given an arbitrary query point `p`, a closest point in `S`
   to `p` can be found quickly.  The measure of distance is
   the Euclidean norm.  This is sometimes called the
   post-office problem [Kn]. The new data structure
   will be termed an RPO tree, from Randomized Post
   Office.  The expected time required to build an RPO tree is
   `O(n^{\lceil d/2\rceil (1+ \epsilon )} )`, for any fixed `\epsilon >
   0`, and a query can be answered in `O(\log n )` worst-case
   time.  An RPO tree requires `O(n^{\lceil d/2\rceil  (1+ \epsilon )}
   )` space in the worst case.  The constant factors in
   these bounds depend on `d` and `\epsilon`.  The bounds
   are average-case due to the randomization employed by
   the algorithm, and hold for any set of input points.
   This result approaches the `\Omega(n^{\lceil d/2\rceil } )` worst-case
   time required for any algorithm that constructs the
   Voronoi diagram of the input points, and is a considerable
   improvement over previous bounds for `d>3`.  The main
   step of the construction algorithm is the determination
   of the Voronoi diagram of a random sample of the sites,
   and the triangulation of that diagram.
 },
	Annote = {
   A key result proven and used
   is that with high probability, every Delaunay ball of a random
   sample of the sites each contains few sites of the full set.
   This is the first appearance of the kind of analysis done in more generality
   here; the analysis essentially reduces to the
   observation that there are `r^{O(1)}` balls associated with
   a sample of size `r`, that might be Delaunay, and an exponentially
   small probability that a given such ball with many points in it
   would be Delaunay in the sample, and finally, the union bound.
 }}

@article{Clarkson86,
	Author = {Kenneth L. Clarkson},
	Journal = {Information Processing Letters},
	Month = {January},
	Pages = {21--24},
	Title = {Linear Programming in ${O(n 3^{d^2})}$ Time},
	Volume = {22},
	Year = {1986}}

@article{rs,
	Appearedas = {Further Applications of Random Sampling to Computational Geometry},
	Appearedin = {STOC '86: } # proc # {Eighteenth  } # stoc,
	Author = {Kenneth L. Clarkson},
	Ayear = {1986},
	Dvi = {rs/p.dvi},
	Journal = dcg,
	Pages = {195--222},
	Pdf = {rs/p.pdf},
	Ps = {rs/p.ps},
	Title = {New Applications of Random Sampling to Computational Geometry},
	Volume = {2},
	Year = {1987},
	Abstract = {
   This paper gives several new demonstrations of
   the usefulness of random sampling techniques in
   computational geometry.  One new algorithm creates a
   search structure for arrangements of hyperplanes by
   sampling the hyperplanes and using information from
   the resulting arrangement to divide and conquer.
   This algorithm requires `O(s ^ {d + \epsilon})`
   expected preprocessing time to build a search structure
   for an arrangement of `s` hyperplanes in `d` dimensions.
   The expectation, as with all expected times reported here,
   is with respect to the random behavior of the algorithm,
   and holds for any input.  Given the data structure, and a
   query point `p`, the cell of the arrangement containing
   `p` can be found in `O( \log s)` worst-case time.
   (The bound holds for any fixed `\epsilon >0`, with
   the constant factors dependent on `d` and `\epsilon`.)
   Using point-plane duality, the algorithm may be used for
   answering halfspace range queries.  Another algorithm finds
   random samples of simplices to determine the separation
   distance of two polytopes.  The algorithm uses expected
   `O(n ^{\lfloor d/2\rfloor} )` time, where `n` is the total number
   of vertices of the two polytopes.  This matches previous
   results [DoK] for the case `d=3` and extends them.
   Another algorithm samples points in the plane to determine
   their order `k` Voronoi diagram, and requires expected
   `O(s^{1+\epsilon}k)` time for `s` points.  (It is
   assumed that no four of the points are cocircular.)
   This sharpens the bound `O(sk ^ 2 \log s)` for Lee's
   algorithm [Lee], and `O(s ^ 2 \log s + k(s-k)
   \log ^ 2 s)` for Chazelle and Edelsbrunner's algorithm
   [ChE].  Finally, random sampling is used to show that
   any set of `s` points in `E^3` has `O(sk^2\log ^ 8 s /
   ( \log \log s) ^ 6 )` distinct `j`-sets with `j\leq k`.
   (For `S \subset E^d`, a set `S'\subset S` with `|S'|=j`
   is a `j`-set of `S` if there is a halfspace `h^+` with
   `S'=S \cap h^+`.)  This sharpens with respect to `k` the
   previous bound `O(s k ^ 5 )` [ChP].  The proof of
   the bound given here is an instance of a "probabilistic
   method" [ErS].
 },
	Annote = {
This paper extends the ideas of an earlier one
to a much more general setting; This framework is slightly less general
than that of range spaces of finite VC-dimension, which were introduced
to geometric algorithms at the
same time by Haussler and Welzl (in the same
issue of the journal).
The scheme here is very close to the
sample compression
framework in the learning theory literature.  It is not clear
that there are any natural applications of the VC-dimension for which
this framework is not also applicable.
The framework is much the same as this
later one, which gives bounds that hold on
average; here the bounds hold with high probability, but with an extra factor
of `\log r`.  The arrangement search structure uses a construction
that sharpens an idea of Megiddo, and in turn was later sharpened (removing
the `\epsilon`) and called cuttings.  The polytope separation distance
algorithm was made obsolete by Gaertner, who gave an `O(n)`-time algorithm
by application and extension of these ideas, and later improvements.
The `k`-set bounds are sharpened here, with
a much cleaner argument.  The construction used for the `k`-set bounds,
considered in a dual arrangement of planes, is an approximate level, always
above an `n/r` level and always below a slightly higher level; that is,
it is a crude kind of "shallow cutting".
  }}

@article{Guibas87,
	Author = {L. J. Guibas and Jorge Stolfi and Kenneth L. Clarkson},
	Journal = {Theoretical Computer Science},
	Pages = {81--84},
	Title = {Solving Related Two and Three Dimensional Linear Programming Problems in Logarithmic Time},
	Volume = {49},
	Year = {1987}}

@inproceedings{mp,
	Address = {New York, New York},
	Author = {Kenneth L. Clarkson},
	Booktitle = {STOC '87: } # proc # {Nineteenth  } # stoc,
	Month = {May},
	Pdf = {mp/p.pdf},
	Ps = {mp/p.ps},
	Title = {Approximation Algorithms for Shortest Path Motion Planning},
	Year = {1987},
	Abstract = {
   This paper gives approximation algorithms for solving
   the following motion planning problem: Given a set of
   polyhedral obstacles and points `s` and `t`, find a
   shortest path from `s` to `t` that avoids the obstacles.
   The paths found by the algorithms are piecewise linear,
   and the length of a path is the sum of the lengths of the
   line segments making up the path.  Approximation algorithms
   will be given for versions of this problem in the plane
   and in three-dimensional space.  The algorithms return an
   `\epsilon`-short path, that is, a path with length within
   `(1+\epsilon)` of shortest.  Let `n` be the total number of
   faces of the polyhedral obstacles, and `\epsilon` a given
   value satisfying `0<\epsilon\leq\pi`.  The algorithm
   for the planar case requires `O(n\log n)/\epsilon`
   time to build a data structure of size `O(n/\epsilon)`.
   Given points `s` and `t`, an `\epsilon`-short path from `s`
   to `t` can be found with the use of the data structure
   in time `O(n/\epsilon+n\log n)`.  The data structure
   is associated with a new variety of Voronoi diagram.
   Given obstacles `S\subset E^3` and points `s,t\in E^3`,
   an `\epsilon`-short path between `s` and `t` can be found
   in 
	$$O(n^2\lambda(n)\log(n/\epsilon)/\epsilon^4
		+n^2\log n\rho\log(n\log \rho))$$
   time, where `\rho` is the
   ratio of the length of the longest obstacle edge
   to the distance between `s` and `t`.  The function
   `\lambda(n)=\alpha(n)^{O(\alpha(n)^{O(1)})}`,
   where the `\alpha(n)` is a form of inverse of
   Ackermann's function. For `\log(1/\epsilon)` and
   `\log\rho` that are `O(\log n)`, this bound is
   `O(n^2(\log^2n)\lambda(n)/\epsilon^4)`.
 },
	Annote = {
   This paper introduces the observation that Yao's fan
   of cones can be used to build a spanner.
   This observation was independently made by
   Keil and Gutwin,
   and by Ruppert and Seidel.
   The resulting spanners have seen recent (circa 2005)
   application in the wireless literature, where they are called
   Yao graphs.
  }}

@inproceedings{l1mp,
	Address = {Waterloo, Ontario},
	Author = {Kenneth L. Clarkson and S. Kapoor and P. Vaidya},
	Booktitle = {SoCG '87: } # proc # {Third  } # socg,
	Month = {June},
	Ps = {l1mp/p.ps},
	Title = {Rectilinear Shortest Paths through Polygonal Obstacles in `{O(n \log^2 n)}` Time},
	Year = {1987},
	Abstract = {
   The problem of finding a rectilinear shortest path
   amongst obstacles may be stated as follows: Given a set
   of obstacles in the plane find a shortest rectilinear
   (`L_1`) path from a point `s` to a point `t` which
   avoids all obstacles.  The path may touch an obstacle
   but may not cross an obstacle.  We study the rectilinear
   shortest path problem for the case where the obstacles
   are non-intersecting simple polygons, and present an `O(
   n \log^2 n  )` algorithm for finding such a path,
   where `n` is the number of vertices of the obstacles. This
   algorithm requires `O(n \log n )` space.  Another algorithm
   is given that requires `O(n ( \log n ) ^{ 3/2} )` time and
   space.  We also study the case of rectilinear obstacles in
   three dimensions, and show that `L_1` shortest paths
   can be found in `O( n^2 \log^ 3 n   )` time.
 }}

@article{Clarkson89,
	Author = {Kenneth L. Clarkson},
	Journal = {Algorithmica},
	Note = {Included in PhD Thesis},
	Pages = {461--469},
	Title = {An Algorithm for Geometric Minimum Spanning Trees Requiring Nearly Linear Expected Time},
	Url = {thesis_abs.html},
	Volume = {4},
	Year = {1989},
	Abstract = {
     We describe an algorithm for finding a minimum spanning
     tree of the weighted complete graph induced by a set
     of `n` points in Euclidean `d`-space.  The algorithm
     requires nearly linear expected time for points that are
     independently uniformly distributed in the unit `d`-cube.
     The first step of the algorithm is the spiral search
     procedure described by Bentley, Weide, and Yao [BWY] for
     finding a supergraph of the MST that has `O(n)` edges.
     (The constant factor in the bound depends on `d`.) The next
     step is that of sorting the edges of the supergraph by
     weight using a radix distribution, or "bucket," sort.
     These steps require linear expected time.  Finally,
     Kruskal's algorithm is used with the sorted edges,
     requiring `O(n\alpha(cn,n))` time in the worst case,
     with `c>6`.  Since the function `\alpha(cn,n)` grows
     very slowly, this step requires linear time for all
     practical purposes.  This result improves the previous best
     `O(n\log\log^*n)`, and employs a much simpler algorithm.
     Also, this result demonstrates the robustness of bucket
     sorting, which requires `O(n)` expected time in this
     case despite the probability dependency between the edge
     weights.
   },
	Bdsk-Url-1 = {thesis_abs.html}}

@inproceedings{Clarkson88,
	Address = {Urbana, Illinois},
	Author = {Kenneth L. Clarkson},
	Booktitle = {SoCG '88: } # proc # {Fourth  } # socg,
	Month = {June},
	Title = {Applications of Random Sampling in Computational Geometry, {II}},
	Url = {#rs2m},
	Year = {1988},
	Bdsk-Url-1 = {#rs2m}}

@inproceedings{Clarkson88a,
	Address = {Urbana, Illinois},
	Author = {Kenneth L. Clarkson and P. W. Shor},
	Booktitle = {SoCG '88: } # proc # {Fourth  } # socg,
	Month = {June},
	Title = {Algorithms for Diametral Pairs and Convex Hulls that are Optimal, Randomized, and Incremental},
	Url = {#rs2m},
	Year = {1988},
	Bdsk-Url-1 = {#rs2m}}

@article{rs2m,
	Author = {Kenneth L. Clarkson and P. W. Shor},
	Journal = dcg,
	Note = {Merges two papers above},
	Number = {1},
	Pages = {387--421},
	Pdf = {rs2m/p.pdf},
	Ps = {rs2m/p.ps},
	Title = {Applications of Random Sampling in Computational Geometry, {II}},
	Volume = {4},
	Year = {1989},
	Abstract = {
   We use random sampling for several new geometric
   algorithms.  The algorithms are "Las Vegas," and their
   expected bounds are with respect to the random behavior of
   the algorithms.  These algorithms follow from new general
   results giving sharp bounds for the use of random subsets
   in geometric algorithms.  These bounds show that random
   subsets can be used optimally for divide-and-conquer,
   and also give bounds for a simple, general technique for
   building geometric structures incrementally.  One new
   algorithm reports all the intersecting pairs of a set of
   line segments in the plane, and requires `O(A+n\log n)`
   expected time, where `A` is the number of intersecting
   pairs reported.  The algorithm requires `O(n)` space in
   the worst case.  Another algorithm computes the convex
   hull of `n` points in `E^d` in `O(n\log n)` expected
   time for `d=3`, and `O(n^{\lfloor d/2\rfloor})` expected time for `d>3`.
   The algorithm also gives fast expected times for random
   input points.  Another algorithm computes the diameter
   of a set of `n` points in `E^3` in `O(n\log n)` expected
   time, and on the way computes the intersection of `n` unit
   balls in `E^3`.  We show that `O(n\log A)` expected time
   suffices to compute the convex hull of `n` points in `E^3`,
   where `A` is the number of input points on the surface of
   the hull.  Algorithms for halfspace range reporting are
   also given.  In addition, we give asymptotically tight
   bounds for `(\le k)`-sets, which are certain halfspace
   partitions of point sets, and give a simple proof of Lee's
   bounds for high order Voronoi diagrams.
 }}

@article{Clarkson89b,
	Appearedin = {SoCG '88: } # proc # {Fourth  } # socg,
	Author = {Kenneth L. Clarkson and Robert E. Tarjan and C. J. Van Wyk},
	Ayear = {1988},
	Journal = dcg,
	Number = {1},
	Pages = {423--432},
	Title = {A Fast {L}as {V}egas Algorithm for Triangulating a Simple Polygon},
	Volume = {4},
	Year = {1989},
	Abstract = {
      We present an algorithm that triangulates a simple
      polygon on `n` vertices in `O(n \log ^* n)` expected
      time.  The algorithm uses random sampling on the
      input, and its running time does not depend on any
      assumptions about a probability distribution from
      which the polygon is drawn.
   },
	Annote = {
      The first algorithm to apply randomization to
      the problem, and obtain essentially linear time.
      See here also.  Chazelle got
      rid of the "essentially"
      and the randomization, at the cost of some complexity;
      Amato et al. took
      out some of that complexity, at the cost of putting
      the randomization back in.
   }}

@article{lp2,
	Appearedin = {FOCS '88: } # proc # {Twenty-Ninth  } # focs,
	Author = {Kenneth L. Clarkson},
	Ayear = {1988},
	Dvi = {lp/p.dvi},
	Journal = jacm,
	Number = {2},
	Pages = {488--499},
	Pdf = {lp/p.pdf},
	Ps = {lp/p.ps},
	Slides = {lp/talk.pdf},
	Title = {{L}as {V}egas algorithms for linear and integer programming when the dimension is small},
	Volume = {42},
	Year = {1995},
	Abstract = {
   This paper gives an algorithm for solving linear
   programming problems.  For a problem with `n` constraints
   and `d` variables, the algorithm requires an expected
	$$O(d^2n)+(\log n)O(d)^{d/2+O(1)}
			+O(d^4\sqrt{n}\log n)$$
    arithmetic operations, as `n\rightarrow\infty`.
   The constant factors do not depend on `d`.  Also, an
   algorithm is given for integer linear programming.
   Let `\varphi` bound the number of bits required to
   specify the rational numbers defining an input constraint
   or the objective function vector.  Let `n` and `d`
   be as before.  Then the algorithm requires expected
	$$O(2^ddn+8^dd\sqrt{n\ln n}\ln n)
				+d^{O(d)}\varphi\ln n$$
   operations on numbers with `d^{O(1)}\varphi` bits, as
   `n->oo`, where the constant factors do not depend on
   `d` or `\varphi`.  The expectations are with respect to
   the random choices made by the algorithms, and the bounds
   hold for any given input.  The technique can be extended
   to other convex programming problems.  For example, an
   algorithm for finding the smallest sphere enclosing a set
   of `n` points in `E^d` has the same time bound.
 },
	Annote = {

   The bound given for integer programming is not quite right,
   as corrected by F. Eisenbrand, 
   here.
   

   (The accompanying talk is the one given at FOCS in 1988; note that it gives the results
    of the paper in terms of the Smallest Enclosing Sphere (or Minimum Enclosing Ball) problem.)


    The delay between conference and journal publication is not the fault of the journal.


    Developments between 1988 and 1995, roughly as discussed at the conclusion of the journal version of the paper:

   
Several developments have occurred since the conference version
    of this paper appeared.
    Adler and Shamir have shown that these ideas can be applied to
    general convex programming.
    Chazelle and Matou{\v s}ek have derandomized the recursive algorithm,
    obtaining a deterministic algorithm requiring
    `d^{O(d)}n` time.
    Alon and Megiddo have applied and extended the ideas
    of this paper to a parallel setting.
    
    Seidel gave a different randomized algorithm, requiring
    `O(d!n)` expected time, with a somewhat simpler analysis;
    Matousek, Sharir and Welzl found a variant of Seidel's algorithm
    requiring time subexponential in `d`.  Their algorithm is
    a randomized instance of the simplex algorithm.
    Kalai was the first to find a subexponential simplex
    algorithm.  Problem instances have long been known
    for which versions of the simplex algorithm require at least `2^d`
    operations.[KM]  These results cast new light on the complexity
    of the simplex algorithm, and on the possibility that linear programming
    problems can be solved in "strongly polynomial" time; such an algorithm
    would need
    `(nd)^{O(1)}`  operations, with the number of operations
    independent of the size of the numbers specifying a problem instance.

    Some more recent related results:
    Vapnik's leave-one-out error estimate for
    support vector machines is a version of Lemma 3.2, generalized to quadratic
    programming.  (Such deleted error estimates were
    found in the sixties.)

    
    Chazelle et al.
    observe that one of these
    algorithms can be the basis for sublinear geometric
    algorithms; another paper
    observes that the approach works well from the standpoint
    of multi-pass algorithms.  Lemma 3.2 (or its generalization
    to convex programming), was
    rediscovered recently:
    Calafiore and Campi, Theorem 1.
    Their proof rediscovers "backwards analysis".
  }}

@article{BCL,
	Appearedin = {SODA '90: } # proc # {First  } # soda,
	Author = {Jon L. Bentley and Kenneth L. Clarkson and David B. Levine},
	Ayear = {1990},
	Journal = {Algorithmica},
	Pages = {168-183},
	Title = {Fast Linear Expected-Time Algorithms for Computing Maxima and Convex Hulls},
	Year = {1993},
	Annote = {Related to this paper.}}

@article{Clarkson90,
	Appearedin = {FOCS '88: } # proc # {Twenth-Ninth  } # focs,
	Author = {Kenneth L. Clarkson and Herbert Edelsbrunner and Leonidas J. Guibas and Micha Sharir and Emo Welzl},
	Ayear = {1988},
	Journal = dcg,
	Number = {2},
	Pages = {99--160},
	Title = {Combinatorial complexity bounds for arrangements of curves and surfaces},
	Volume = {5},
	Year = {1990}}

@inproceedings{tsp,
	Author = {Kenneth L. Clarkson},
	Booktitle = {SODA '91: } # proc # {Second  } # soda,
	Month = {January},
	Pdf = {tsp/p.pdf},
	Ps = {tsp/p.ps},
	Title = {Approximation algorithms for planar traveling salesman tours and minimum-length triangulations},
	Year = {1991},
	Abstract = {
   This paper gives a partitioning scheme for the geometric,
   planar traveling salesman problem, under the Euclidean
   metric: given a set `S` of `n` points in the plane, find
   a shortest closed tour (path) visiting all the points.
   The scheme employs randomization, and gives a tour
   that can be expected to be short, if `S` satisfies
   the condition that a random subset `R\subset S` has on
   average a tour much shorter than an optimal tour of `S`.
   This condition holds for points independently, identically
   distributed in the plane, for example, for which a tour
   within `1+\epsilon` of shortest can be found in expected
   time `nk^2 2^k`, where `k=O(\log\log n)^3/\epsilon^2`.
   One algorithm employed in the scheme is of interest
   in its own right: when given a simple polygon `P`, it
   finds a Steiner triangulation of the interior of `P`.
   If `P` has `n` sides and perimeter `L_P`, the edges
   of the triangulation have total length `L_PO(\log n)`.
   If this algorithm is applied to a simple polygon induced
   by a minimum spanning tree of a point set, the result is a
   Steiner triangulation of the set with total length within
   a factor of `O(\log n)` of the minimum possible.
},
	Annote = {
   A better partitioning scheme was given by Eppstein, using quadtrees,
   and of course Arora's algorithm makes the whole approach moot.
   There is a certain foreshadowing here of "pseudo-triangulations",
   however.
  }}

@article{tri,
	Appearedin = {SoCG '91: } # proc # {Seventh  } # socg,
	Author = {Kenneth L. Clarkson and Richard Cole and Robert E. Tarjan},
	Ayear = {1991},
        Slides = {tri/t.pdf},
	Slidesnote1 = {n},
	Slidesnote2 = {n},
	Dvi = {tri/p.dvi},
	Journal = {Int. J. Comp. Geom. and Applications},
	Pages = {117-133},
	Pdf = {tri/p.pdf},
	Ps = {tri/p.ps},
	Title = {Randomized parallel algorithms for trapezoidal diagrams},
	Year = {1992},
	Abstract = {
   We describe randomized parallel algorithms for building
   trapezoidal diagrams of line segments in the plane.
   The algorithms are designed for a CRCW PRAM.  For general
   segments, we give an algorithm requiring optimal `O(A+n\log
   n)` expected work and optimal `O(\log n)` time, where
   `A` is the number of intersecting pairs of segments.
   If the segments form a simple chain, we give an algorithm
   requiring optimal `O(n)` expected work and `O(\log
   n\log\log n\log^*n)` expected time, and a
   simpler algorithm requiring `O(n\log^*n)` expected work.
   The serial algorithm corresponding to the latter is among
   the simplest known algorithms requiring `O(n\log^*n)`
   expected operations.  For a set of segments forming `K`
   chains, we give an algorithm requiring `O(A+n\log^*n+K\log n)`
   expected work and `O(\log n\log\log n\log^* n)`
   expected time.  The parallel time bounds require the
   assumption that enough processors are available, with
   processor allocations every `\log n` steps.
 },
	Annote = {
   The serial version of our basic algorithm, as applied to non-intersecting segments,
   is a bit simpler
   than the divide-and-conquer scheme of the earlier paper,
   and not very far from Seidel's algorithm,
   independently discovered at the same time as this one.  (His algorithm uses planar
   point location for a key task, while we use a sweepline algorithm.)
 }}

@article{cms,
	Appearedin = {Proc. Symp. Theor. Aspects of Comp. Sci.},
	Author = {Kenneth L. Clarkson and Kurt Mehlhorn and Raimund Seidel},
	Ayear = {1992},
	Dvi = {cms/p.dvi},
        Slides = {cms/t.pdf},
	Slidesnote1 = {n},
	Slidesnote2 = {n},
	Journal = {Comp. Geom.: Theory and Applications},
	Pages = {185-121},
	Pdf = {cms/p.pdf},
	Ps = {cms/p.ps},
	Title = {Four results on randomized incremental constructions},
	Year = {1993},
	Abstract = {
		We prove four results on randomized incremental constructions (RICs):
		

		an analysis of the expected behavior under insertion and deletions,
		a fully dynamic data structure for convex hull maintenance in arbitrary dimensions,
		a tail estimate for the space complexity of RICs,
		a lower bound on the complexity of a game related to RICs.
		
},
	Annote = {
  Among other things, this paper extends Seidel's "backwards analysis" approach
  (not far from the "leave one out" technique of learning theory) to a general
  version of RIC; this involves a kind of "searching in history" and exploitation
  of the exchangeability of members of a random sample.
  
  The talk slides are missing a few figures.
 }}

@incollection{rga,
	Author = {Kenneth L. Clarkson},
	Booktitle = {Computers and Euclidean Geometry},
	Category = {Survey},
	Editor = {F. K. Hwang and D. Z. Hu},
	Pdf = {rga/p.pdf},
	Ps = {rga/p.ps},
	Publisher = {World Scientific Publishing},
	Title = {Randomized Geometric Algorithms},
	Year = {1992},
	Abstract = {
		This paper surveys some of the applications of randomization to computational
		and combinatorial geometry.  Randomization provides
		a general way to divide-and-conquer geometric problems,
		and gives a simple incremental approach to building geometric structures.
		The paper discusses closest-point problems,
		convex hulls, Voronoi diagrams, trapezoidal diagrams of line segments,
		linear programming in small dimension, range queries,
		and bounds for point-line incidences and for `(\le k)`-sets.
		Relations to the
		Vapnik-Chervonenkis dimension, PAC-learnability of geometric concepts,
		and the Hough transform are briefly noted.
  }}

@article{kmins,
	Author = {Kenneth L. Clarkson},
	Dvi = {kmins/p.dvi},
	Journal = dcg,
	Pages = {227--233},
	Pdf = {kmins/p.pdf},
	Ps = {kmins/p.ps},
	Title = {A bound on local minima of arrangements that implies the upper bound theorem},
	Volume = {10},
	Year = {1993},
	Abstract = {
		This paper shows that the `i`-level of an arrangement of hyperplanes in
		`E^d` has at most `\binom{i+d-1}{d-1}` local minima.
		This bound follows from ideas previously used to prove bounds on `(\le k)`-sets.
		Using linear programming duality, 
		the Upper Bound Theorem is obtained as a corollary,
		giving yet another proof of this
		celebrated bound on the number of vertices of a simple polytope
		in `E^d` with `n` facets.
},
	Annote = {This paper by Mulmuley seems closely
  related, and probably should have been cited.
 }}

@inproceedings{dets,
	Address = {Pittsburgh, PA},
	Author = {Kenneth L. Clarkson},
	Booktitle = {FOCS '92: } # proc # {Thirty-First  } # focs,
	Dvi = {dets/p.dvi},
        Slides = {dets/t.pdf},
	Slidesnote1 = {n},
	Slidesnote2 = {n},
	Month = {October},
	Pages = {387-395},
	Pdf = {dets/p.pdf},
	Ps = {dets/p.ps},
	Title = {Safe and Effective Determinant Evaluation},
	Year = {1992},
	Abstract = {
		The problem of evaluating the sign of the determinant of a
		small matrix arises in many geometric algorithms.
		Given an `n\times n` matrix `A` with integer entries, whose columns are all
		smaller than `M` in Euclidean norm,
		the algorithm given here evaluates the sign of the determinant `\det A`
		exactly.  The algorithm requires an arithmetic precision of less
		than `1.5n+2\lg M` bits.
		The number of arithmetic operations needed
		is `O(n^3)+O(n^2)\log\OD(A)/\beta`,
		where `\OD(A)|\det A|` is the product of the lengths of the columns of `A`,
		and `\beta` is the number of "extra" bits of precision,
		   $$\min\{\lg(1/\mathbf{u})-1.1n-2\lg n-2,\lg N - \lg M - 1.5n - 1\},$$
		where `\mathbf{u}` is the roundoff error in approximate arithmetic, and `N`
		is the largest representable integer.
		Since `\OD(A)\leq M^n`, the algorithm requires `O(n^3\lg M)` time,
		and `O(n^3)` time when `\beta=\Omega(\log M)`.
}}

@unpublished{hulltalk,
	Author = {Kenneth L. Clarkson},
	Nobib = {y},
	Note = {Presentation at Fifth MSI-Stony Brook Workshop on Computational Geometry},
	Ps = {hulltalk.ps},
	Title = {Convex Hulls: Some Algorithms and Applications},
	Url = {hulltalk.html},
	Year = {1996},
	Abstract = {
		This talk sketches: a convenient method for
		computing the volumes of Voronoi regions; a proof that
		"area-stealing" natural neighbor interpolation works;
		a scheme for smoother natural neighbor interpolation
		alternative to Sibson's method; the interpolation
		scheme used in the "finite volume element" method;
		and the observation that the minimax piecewise-linear
		interpolant of a convex function is the (lower)
		convex hull.
  },
	Bdsk-Url-1 = {hulltalk.html}}

@article{center,
	Appearedin = {SoCG '93: } # proc # {Ninth  } # socg,
	Author = {Kenneth L. Clarkson and David Eppstein and Gary L. Miller and Carl Sturtivant and Shang-Hua Teng},
	Ayear = {1993},
	Demo = {center/demo.html},
	Demoscrolling = {auto},
	Dvi = {center/p.dvi},
	Journal = {International Journal of Computational Geometry and Applications},
	Number = 3,
	Pages = {357--377},
	Pdf = {center/p.pdf},
	Ps = {center/p.ps},
	Title = {Approximating center points with iterated {R}adon points},
	Volume = 6,
	Year = {1996},
	Abstract = {
		We give a practical and provably good Monte Carlo algorithm
		for approximating center points.  Let `P` be a set of `n`
		points in `\reals^d`.  A point `c \in \reals^d` is a `\beta`-center
		point of `P` if every closed halfspace containing `c`
		contains at least `\beta n` points of `P`.  Every point
		set has a `1/(d+1)`-center point; our algorithm finds
		an `\Omega(1/d^2)`-center point with high probability.
		Our algorithm has a small constant factor and is the first
		approximate center point algorithm whose complexity is
		subexponential in `d`.  Moreover, it can be optimally
		parallelized to require `O(\log^2d\log\log n)` time.
		Our algorithm has been used in mesh partitioning methods
		and can be used in constructing high breakdown estimators
		for multivariate datasets in statistics. It has the
		potential to improve results in practice for constructing
		weak `\epsilon`-nets.  We derive a variant of our algorithm
		whose time bound is fully polynomial in `d` and linear in
		`n`, and show how to combine our approach with previous
		techniques to compute high quality center points more
		quickly.
  }}

@inproceedings{Clarkson93b,
	Author = {Kenneth L. Clarkson},
	Booktitle = {WADS '93: Proceedings of the Third Workshop on Algorithms and Data Structures},
	Pages = {246--252},
	Title = {Algorithms for polytope covering and approximation},
	Url = {#pcover},
	Year = {1993},
	Bdsk-Url-1 = {#pcover}}

@inproceedings{Clarkson94,
	Author = {Kenneth L. Clarkson},
	Booktitle = {SoCG '94: } # proc # {Tenth  } # socg,
	Pages = {160-164},
	Title = {An algorithm for approximate closest-point queries},
	Url = {#pcover},
	Year = {1994},
	Bdsk-Url-1 = {#pcover}}

@unpublished{pcover,
	Author = {Kenneth L. Clarkson},
	Dvi = {pcover/p.dvi},
	Pdf = {pcover/p.pdf},
	Ps = {pcover/p.ps},
        Note = {Merges two papers above},
	Title = {Algorithms for Polytope Covering and Approximation, and for Approximate Closest-point Queries},
	Year = {1994},
	Abstract = {
		This paper gives an algorithm for polytope covering:
		let `L` and `U` be sets of points in `R^d`,
		comprising `n` points altogether.  A cover for
		`L` from `U` is a set `C\subset U` with `L` a subset
		of the convex hull of `C`.  Suppose `c` is the size of
		a smallest such cover, if it exists.  The randomized
		algorithm given here finds a cover of size no more than
		`c(5d\ln c)`, for `c` large enough.  The algorithm
		requires `O(c^2n^{1+\delta})` expected time. (In
		this paper, `\delta` will denote any fixed value
		greater than zero.) More exactly, the time bound is
		     $$O(cn^{1+\delta}+c(nc)^{
			     1/(1+\gamma/(1+\delta))}),$$
		where `\gamma\equiv 1/\lfloor d/2\rfloor`.  The previous best bounds
		were `c O(\log n)` cover size in `O(n^d)` time [MiS].
		A variant algorithm is applied to the problem of
		approximating the boundary of a polytope with the
		boundary of a simpler polytope.  For an appropriate
		measure, an approximation with error `\epsilon`
		requires `c=O(1/\epsilon)^{(d-1)/2}` vertices, and the
		algorithm gives an approximation with `c({5d^3\ln(1/\epsilon)})`
		vertices.  The algorithms apply ideas previously used
		for small-dimensional linear programming.  The final
		result here applies polytope approximation to the the
		post office problem: given `n` points (called
		sites) in `d` dimensions, build a data structure so
		that given a query point `q`, the closest site to `q`
		can be found quickly.  The algorithm given here is given
		also a relative error bound `\epsilon`, and depends on
		a ratio `\rho`, which is no more than the ratio of the
		distance between the farthest pair of sites to the distance
		between the closest pair of sites.  The algorithm builds
		a data structure of size `O(n(\log\rho)/\epsilon^{d/2}`
		in time `O(n^2(\log\rho))/\epsilon^d`.  With this data
		structure, closest-point queries can be answered in
		`O(\log n)/\epsilon^{d/2}` time.
		},
	Annote = {
   Brönnimann and Goodrich show that
   the same iterative randomized algorithm
   applies in the general setting of range spaces of bounded
   VC-dimension, and that
   a linear-sized `\epsilon`-net implies a
   constant-factor approximation algorithm.  Their results
   are extended by this later paper.
  }}

@inproceedings{os,
	Author = {Kenneth L. Clarkson},
	Booktitle = {FOCS '94: } # proc # {Thirty-Fifth  } # focs,
	Dvi = {os/p.dvi},
	Pages = {695--702},
        Slides = {os/t.pdf},
	Slidesnote1 = {n},
	Slidesnote2 = {n},
	Pdf = {os/p.pdf},
	Ps = {os/p.ps},
	Title = {More output-sensitive geometric algorithms},
	Year = 1994,
	Abstract = {
		A simple idea for speeding up the computation of extrema
		of a partially ordered set turns out to have a number
		of interesting applications in geometric algorithms; the
		resulting algorithms generally replace an appearance of the
		input size `n` in the running time by an output size `A\leq
		n`.  In particular, the `A` coordinate-wise minima of a set
		of `n` points in `R^d` can be found by an algorithm needing
		`O(nA)` time.  Given `n` points uniformly distributed
		in the unit square, the algorithm needs `n+O(n^{5/8})`
		point comparisons on average.  Given a set of `n` points
		in `R^d`, another algorithm can find its `A` extreme
		points in `O(nA)` time.  Thinning for nearest-neighbor
		classification can be done in time `O(n\log n)\sum_i A_i n_i`,
		finding the `A_i` irredundant points among `n_i`
		points for each class `i`, where `n=\sum_i n_i` is the
		total number of input points.  This sharpens a more
		obvious `O(n^3)` algorithm, which is also given here.
		Another algorithm is given that needs `O(n)` space to
		compute the convex hull of `n` points in `O(nA)` time.
		Finally, a new randomized algorithm finds the convex hull
		of `n` points in `O(n\log A)` expected time, under the
		condition that a random subset of the points of size `r`
		has expected hull complexity `O(r)`.  All but the last
		of these algorithms has polynomial dependence on the
		dimension `d`, except possibly for linear programming.
 },
	Annote = {
   There is some overlap with the work of
   Chan and
   Ottman et al,
   in particular, for finding extreme points.
 }}

@unpublished{moving_diam,
	Author = {Kenneth L. Clarkson},
	Dvi = {moving_diam/p.dvi},
	Pdf = {moving_diam/p.pdf},
	Ps = {moving_diam/p.ps},
	Title = {Algorithms for the Minimum Diameter of Moving Points and for the Discrete 1-center Problem},
	Year = {1997},
	Abstract = {
		Given points moving with constant, but possibly different,
		velocities, the minimum moving diameter problem is to
		find the minimum, over all time, of the maximum distance
		between a pair of points at each moment.  This note gives
		a randomized algorithm requiring `O(n \log n )` expected
		time for this problem, in two and three dimensions.
		Also briefly noted is a randomized `O(n\log n\log\log n)`
		expected-time algorithm for the discrete 1-center problem
		in three dimensions; in this problem, a member `p` of a
		set `S` of points is desired, whose maximum distance to `S`
		is minimum over all points of `S`.
 },
	Annote = {
   Slight foreshadowing of sublinear geometric algorithms.
 }}

@unpublished{recon,
	Author = {Kenneth L. Clarkson},
	Dvi = {recon/p.dvi},
	Pdf = {recon/p.pdf},
	Ps = {recon/p.ps},
	Title = {A General Randomized Incremental Reconstruction Procedure},
	Year = {1997},
	Abstract = {
    The technique of randomized incremental construction
    allows a variety of geometric structures to be built
    quickly.  This note shows that once such a structure
    is built, it is possible to store the geometric
    input data for it so that the structure can be built
    again by a randomized algorithm even more quickly.
    Except for the randomization, this generalizes the
    technique of Snoeyink and van Kreveld that applies to
    planar problems.
 }}

@unpublished{CIS677,
	Author = {Kenneth L. Clarkson},
	Note = {Lecture notes},
	Title = {A graduate course in computational geometry},
	Url = {cis677},
	Year = {1997},
	Bdsk-Url-1 = {cis677}}

@article{nnms,
	Appearedin = {STOC '97: } # proc # {Twenty-Ninth  } # stoc,
	Author = {Kenneth L. Clarkson},
	Ayear = {1997},
	Journal = dcg,
	Pages = {63--93},
	Pdf = {nnms/p.pdf},
	Ps = {nnms/p.ps},
	Title = {Nearest Neighbor Queries in Metric Spaces},
	Volume = 22,
	Year = 1999,
	Abstract = {
    Given a set `S` of `n` sites (points), and a distance measure `d`,
    the nearest neighbor searching problem
    is to build a data structure so that given a query point `q`,
    the site nearest to `q` can be found quickly.
    This paper gives data structures for this problem when the sites and queries
    are in a metric space.
    One data structure, `D(S)`, uses a divide-and-conquer recursion.
    The other data structure, `M(S,Q)`, is somewhat like a skiplist.
    Both are simple and implementable.
    The data structures are analyzed when the metric space obeys a
    certain sphere-packing bound, and when the sites and query points
    are random and have distributions with an exchangeability property.
    This property implies, for example, that 
    query point `q` is a random element of `S\cup\{q\}`.
    Under these conditions,
    the preprocessing and space bounds for the algorithms are close
    to linear in `n`.  They depend also on the sphere-packing
    bound, and on the logarithm of the distance ratio `\Upsilon(S)` of `S`, 
    the ratio of the distance between the farthest pair of
    points in `S` to the distance between the closest pair.
    The data structure `M(S,Q)`
    requires as input data an additional
    set `Q`, taken to be representative of the query points.
    The resource bounds of `M(S,Q)` have a dependence on
    the distance ratio of `S\cup Q`.
    While `M(S,Q)` can return wrong
    answers, its failure probability can be bounded, and
    is decreasing in a parameter `K`.  Here `K\leq |Q|/n` is chosen
    when building `M(S,Q)`.
    The expected query time for `M(S,Q)` is `O(K\log n)\log\Upsilon(S\cup Q)`,
    and the resource bounds increase linearly in `K`.
    The data structure `D(S)` has expected `O(\log n)^{O(1)}` query
    time, for fixed distance ratio.
    The preprocessing algorithm for `M(S,Q)` can be used to solve
    the all-nearest-neighbor problem for `S` in
    `O(n(\log n)^2(\log\Upsilon(S))^2)` expected time.
    },
	Annote = {
     The assumption of a sphere-packing bound is equivalent
     to a bounded doubling constant, as discussed by
     Krauthgamer and Lee,
     which is more general than the growth-restricted
     or doubling measure
     assumption of Karger and Ruhl.
     See also Har-Peled and Mendel,
     and a survey.
   }}

@article{CSW,
	Author = {Kenneth L. Clarkson and Wim Sweldens and Alice Zheng},
	Journal = {IEEE Trans. Commun.},
	Number = 2,
	Pages = {253--261},
	Title = {Fast Multiple Antenna Differential Decoding},
	Url = {http://cm.bell-labs.com/who/wim/papers/fast/},
	Volume = 49,
	Year = {2001},
	Abstract = {
		We present an algorithm based on lattice reduction for the
		fast decoding of diagonal differential modulation across
		multiple antenna. While the complexity of the maximum
		likelihood algorithm is exponential both in the number of
		antenna and the rate, the complexity of our approximate
		lattice algorithm is polynomial in the number of antennas
		and the rate. We show that the error performance of our
		lattice algorithm is very close to the maximum likelihood
		algorithm.
	},
	Bdsk-Url-1 = {http://cm.bell-labs.com/who/wim/papers/fast/}}

@unpublished{coresets1,
	Author = {Mihai Badoiu and Kenneth L. Clarkson},
	Demo = {core_sets/t/demo.svg},
	Note = {Manuscript},
	Pdf = {coresets1.pdf},
	Slides = {core_sets/t/t.xml},
	Slidesnote2 = {y},
	Title = {Optimal Core-Sets for Balls},
	Versions = { Revised May 2006: Removed dependence on size of smallest ball, consider variant, etc.
 },
	Year = {2002},
	Abstract = {
		Given a set of points `P\subset \reals^d` and value `\epsilon>0`,
		an $\epsilon$-core-set `S \subset P` has the property that the smallest ball
		containing `S` is within `\epsilon` of the smallest ball containing
		 `P`.
		This paper shows that any point set has an `\epsilon`-core-set
		of size `\lceil 1/\epsilon\rceil `, and this bound is tight
		in the worst case.  Some experimental results are also
		given, comparing this algorithm with a previous one,
		and with a more powerful, but slower one.
   }}

@inproceedings{coresets2,
	Author = {Mihai Badoiu and Kenneth L. Clarkson},
	Booktitle = {SODA '03: } # proc # {Fourteenth  } # soda,
	Pdf = {coresets2.pdf},
	Title = {Smaller Core-Sets for Balls},
	Year = {2003},
	Abstract = {
		Given a set of points `P\subset R^d` and value `\epsilon>0`,
		an `\epsilon`-core-set `S \subset P` has the property that the smallest ball
		containing `S` is within `\epsilon` of the smallest ball containing
		 `P`.
		This paper shows that any point set has an `\epsilon`-core-set
		of size `\lceil 1/\epsilon\rceil `, and this bound is tight
		in the worst case.
		A faster algorithm given here finds an `\epsilon`-core-set of size
		at most `2/\epsilon`.
		These results imply the existence of small core-sets for solving approximate
		`k`-center
		clustering and related problems. The sizes of these core-sets
		are considerably smaller than the previously known bounds,
		and imply
		faster algorithms; one such algorithm needs
		`O(d n/\epsilon+(1/\epsilon)^{5})`
		time to compute an `\epsilon`-approximate
		minimum enclosing ball (1-center) of `n` points in `d`
		dimensions.
		A simple gradient-descent algorithm is also given, for computing
		the minimum enclosing ball in `O(d n / \epsilon^{2})` time.
		This algorithm also implies slightly faster algorithms for
		computing approximately the smallest radius `k`-flat fitting
		a set of points.
   },
	Annote = {The ideas and algorithm have seen application in
   machine learning, including support vector regression,
    and computational biology (mentioned in the "supplementary notes" of the latter).}}

@unpublished{CH2,
	Author = {Kenneth L. Clarkson and John D. Hobby},
	Pdf = {covprob/p.pdf},
	Ps = {covprob/p.ps},
	Title = {A Model of Coverage Probability under Shadow Fading},
	Year = {2003},
	Abstract = {
		We give a simple analytic model of coverage probability
		for CDMA cellular phone systems under lognormally
		distributed shadow fading.  Prior analyses have generally
		considered the coverage probability of a single antenna;
		here we consider the probability of coverage by an
		ensemble of antennas, using some independence assumptions,
		but also modeling a limited form of dependency among the
		antenna fades.	We use the Fenton-Wilkinson approach
		of approximating the external interference `I_0` as
		lognormally distributed.  We show that our model gives
		a coverage probability that is generally within a few
		percent of Monte Carlo estimates, over a wide regime of
		antenna strengths and other relevant parameters.
	}}

@inproceedings{HCHP,
	Author = {K. Georg Hampel and Kenneth L. Clarkson and John D. Hobby and Paul A. Polakos},
	Booktitle = {VTC-2003-Fall: IEEE Vehicular Technology Conference},
	Pages = {927--932},
	Pdf = {vtc2003.pdf},
	Title = {The Tradeoff between Coverage and Capacity in Dynamic Optimization of 3{G} Cellular Networks},
	Year = {2003},
	Abstract = {
		For 3G cellular networks, capacity is an important
		objective, along with coverage, when characterizing the
		performance of high-data-rate services. In live networks,
		the effective network capacity heavily depends on the
		degree that the traffic load is balanced over all cells,
		so changing traffic patterns demand dynamic network
		reconfiguration to maintain good performance. Using a
		four-cell sample network, and antenna tilt, cell power
		level and pilot fraction as adjustment variables, we
		study the competitive character of network coverage and
		capacity in such a network optimization process, and
		how it compares to the CDMA-intrinsic coverage-capacity
		tradeoff driven by interference. We find that each set of
		variables provides its distinct coverage-capacity tradeoff
		behavior with widely varying and application-dependent
		performance gains. The study shows that the impact of
		dynamic load balancing highly depends on the choice of the
		tuning variable as well as the particular tradeoff range
		of operation.
	}}

@article{BCGVW,
	Author = {Simon C. Borst and Kenneth Clarkson and John Graybeal and Harish Viswanathan and Phillip Whiting},
	Journal = {Bell Labs Technical Journal},
	Number = 2,
	Pages = {33--47},
	Pdf = {BCGVW.pdf},
	Title = {User-Level {QoS} and Traffic Engineering for {3G} Wireless {1xEV-DO} Systems},
	Volume = 8,
	Year = {2003},
	Abstract = {
		3G wireless systems such as 3G-1X, 1xEV-DO and 1xEV-DV
		provide support for a variety of high-speed data
		applications. The success of these services critically
		relies on the capability to ensure an adequate QoS
		experience to users at an affordable price. With
		wireless bandwidth at a premium, traffic engineering and
		network planning play a vital role in addressing these
		challenges. We present models and techniques that we have
		developed for quantifying the QoS perception of 1xEV-DO
		users generating FTP or Web browsing sessions. We show
		how user-level QoS measures may be evaluated by means of
		a Processor-Sharing model which explicitly accounts for
		the throughput gains from multi-user scheduling. The model
		provides simple analytical formulas for key performance
		metrics such as response times, blocking probabilities and
		throughput. Analytical models are especially useful for
		network deployment and in-service tuning purposes due to
		the intrinsic difficulties associated with simulation-based
		optimization approaches. We discuss the application of
		our results in the context of Ocelot, which is a Lucent
		tool for wireless network planning and optimization.
	}}

@unpublished{MV,
	Author = {Kenneth L. Clarkson},
	Pdf = {mv.pdf},
	Title = {Solution of Linear Systems Using Randomized Rounding},
	Year = {2003},
	Abstract = {
	   This paper gives an algorithm for solving linear systems, using a
	   randomized version of incomplete `LU` factorization together with
	   iterative improvement.  The factorization uses Gaussian elimination
	   with partial pivoting, and preserves sparsity during factorization by
	   randomized rounding of the entries.  The resulting approximate factorization
	   is then applied to estimate the solution.  This simple technique,
	   combined with
	   iterative improvement, is demonstrated to be effective for a range of
	   linear systems.  When applied to medium-sized sample matrices for PDEs,
	   the algorithm is qualitatively like multigrid: the work per iteration
	   is typically linear in the order of the matrix, and the number of
	   iterations to achieve a small residual is typically on the order of
	   fifteen to twenty.  The technique is also tested for a sample of asymmetric
	   matrices from the Matrix Market, and is found to have
	   similar behavior for many of them.
	}}

@unpublished{SB,
	Author = {Kenneth L. Clarkson},
	Demo = {sb/t/demo.svg},
	Note = {Preliminary version presented at ALENEX99},
	Slides = {sb/t/vv.xml},
	Slidesnote2 = {y},
	Title = {Nearest Neighbor Searching in Metric Spaces: Experimental Results for `sb(S)`},
	Url = {Msb/readme.html},
	Year = 2003,
	Abstract = {
		Given a set `S` of `n` sites (points), and a distance
		measure `d`, the nearest neighbor searching problem is to build
		a data structure so that given a query point `q`, the site nearest
		to `q` can be found quickly.  This paper gives a data structure for
		this problem; the data structure is built using the distance function
		as a "black box".  The structure is able to speed up nearest
		neighbor searching in a variety of settings, for example: points
		in low-dimensional or
		structured Euclidean space, strings under Hamming and edit distance,
		and bit vector data from an OCR application.  The data structures are
		observed to need linear space, with a modest constant factor.  The
		preprocessing time needed per site is observed to match the
		query time.  The data structure can
		be viewed as an application of a "kd-tree" approach in the metric
		space setting, using Voronoi regions of a subset in place of
		axis-aligned boxes.
	},
	Bdsk-Url-1 = {Msb/readme.html}}

@inproceedings{dynamics,
	Author = {Kenneth L. Clarkson and John D. Hobby},
	Booktitle = {VTC-2004-Spring: IEEE Vehicular Technology Conference},
	Demo = {dynamics/demo.svg},
	Pages = {1534--1538},
	Pdf = {dynamics/p.pdf},
	Ps = {dynamics/p.ps},
	Slides = {dynamics/t.xml},
	Slidesnote2 = {y},
	Title = {A Model of Soft Handoff under Dynamic Shadow Fading},
	Volume = 3,
	Year = {2004},
	Abstract = {
	  We introduce a simple model of the effect of temporal
	 variation in signal strength on active-set membership,
	 for cellular phone systems that use the soft-handoff
	 algorithm of IS-95a.  This model is based on a
	 steady-state calculation, and its applicability is
	 confirmed by Monte Carlo studies.
	}}

@unpublished{cwi_ksets,
	Author = {Kenneth L. Clarkson},
	Dvi = {cwi_ksets/p.dvi},
	Pdf = {cwi_ksets/p.pdf},
	Ps = {cwi_ksets/p.ps},
	Title = {On the Expected Number of `k`-sets of Coordinate-Wise Independent Points},
	Year = {2004},
	Abstract = {
    Let `S` be a set of `n` points in `d` dimensions.
    A `k`-set of `S` is a subset of size `k` that is
    the intersection of `S` with some open halfspace.
    This note shows that if the points of `S` are random,
    with a coordinate-wise independent distribution,
    then the expected number of `k`-sets of `S` is
    `O((k\log(en/k))^{d-1})2^d/(d-1)!`, as `k\log n->\infty`,
    with a constant independent of the dimension.
 }}

@inproceedings{l1,
	Author = {Kenneth L. Clarkson},
	Booktitle = {SODA '05: } # proc # {Sixteenth  } # soda,
	Dvi = {l1/p.dvi},
	Pdf = {l1/p.pdf},
	Ps = {l1/p.ps},
	Slides = {l1/t/t.xml},
	Slidesnote1 = {y},
	Slidesnote2 = {y},
	Title = {Subgradient and Sampling Algorithms for `L_1` Regression},
	Year = {2005},
	Abstract = {
		Given an `n\times d` matrix `A`
		and an `n`-vector `b`, the `L_1`
		regression problem is to find the
		vector `x` minimizing the objective
		function `||Ax-b||_1`, where
		`||y||_1 \equiv \sum_i |y_i|` for vector
		`y`. This paper gives an algorithm needing
		`O(n\log n)d^{O(1)}` time in the worst case
		to obtain an approximate solution, with
		objective function value within a fixed
		ratio of optimum.  Given `\epsilon>0`, a
		solution whose value is within `1+\epsilon`
		of optimum can be obtained either
		by a deterministic algorithm using an
		additional `O(n)(d/\epsilon)^{O(1)}` time,
		or by a Monte Carlo algorithm using an
		additional `O((d/\epsilon)^{O(1)})` time.
		The analysis of the randomized algorithm
		shows that weighted coresets exist for
		`L_1` regression.  The algorithms use the
		ellipsoid method, gradient descent, and
		random sampling.
	}}

@article{set_cover,
	Appearedin = {SoCG '05: } # proc # {Twenty-First  } # socg,
	Author = {Kenneth L. Clarkson and Kasturi Varadarajan},
	Ayear = {2005},
	Journal = dcg,
	Month = {January},
	Number = {1},
	Pages = {43--58},
	Pdf = {set_cover/p.pdf},
	Ps = {set_cover/p.ps},
	Slides = {set_cover/t/t.xml},
	Slidesnote1 = {y},
	Title = {Improved Approximation Algorithms for Geometric Set Cover},
	Volume = {37},
	Year = {2007},
	Abstract = {
		Given a collection `S` of subsets of some set `U`, and
		`M \subset U`, the set cover problem is to find
		the smallest subcollection `C\subset S` such that `M` is
		a subset of the union of the sets in `C`.  While the general
		problem is NP-hard to solve, even approximately, here we
		consider some geometric special cases, where usually
		`U = \reals^d`.  Combining previously known techniques [BG,CF], we show 
		that polynomial time approximation algorithms with provable performance exist,
		under a certain general condition: that for a random subset
		`R\subset S` and function `f()`, there is a decomposition
		of the complement `U\setminus\cup_{Y\in R} Y` into an
		expected `f(|R|)` regions, each region of a particular
		simple form.  Under this condition, a cover
		of size `O(f(|C|))` can be found in polynomial time.  Using this result,
		and combinatorial geometry results implying
		bounding functions `f(c)` that are nearly linear,
		we obtain `o(\log c)` approximation algorithms
		for covering by fat triangles, by pseudodisks,
		by a family of fat objects,
		and others.  Similarly, constant-factor
		approximations follow for similar-sized fat triangles
		and fat objects, and for fat wedges.
		With more work, we obtain constant-factor
		approximation algorithms for covering by unit cubes in
		`\reals^3`, and for guarding an `x`-monotone polygonal chain.
   },
	Annote = {
		While no great claim of novelty was made for Theorem 2.2,
		it should be pointed out that the proof is very close to that by
		de Berg and Cheong (n Schwarzkopf).
		(The related proof appears only in the journal version of their paper.
		Thanks to Sariel Har-Peled for pointing this out.)
		Moreover, recent work (circa 2008) tightens some of the
		nonlinear cases;
		see also here.
 }}

@incollection{nn_survey,
	Author = {Kenneth L. Clarkson},
	Booktitle = {Nearest-Neighbor Methods for Learning and Vision: Theory and Practice},
	Category = {Survey},
	Dvi = {nn_survey/p.dvi},
	Editor = {Gregory Shakhnarovich and Trevor Darrell and Piotr Indyk},
	Isbn = {0-262-19547-X},
	Pages = {15--59},
	Pdf = {nn_survey/p.pdf},
	Ps = {nn_survey/p.ps},
	Publisher = {MIT Press},
	Slides = {nn_survey/t/t.xml},
	Slidesnote1 = {y},
	Title = {Nearest-Neighbor Searching and Metric Space Dimensions},
	Year = {2006},
	Abstract = {
		Given a set `S` of points in a metric
		space with distance function `D`, the
		nearest-neighbor searching problem is to build a
		data structure for `S` so that for an input query point `q`,
		the point `s\in S` that minimizes `D(s,q)`
		can be found quickly.
		We survey approaches to this problem, and its
		relation to concepts of metric space dimension.
		Several measures of dimension can be estimated
		using nearest-neighbor searching, while others
		can be used to estimate the cost of that searching.
		In recent years, several data structures have been
		proposed that are provably good for low-dimensional
		spaces, for some particular measures of dimension.
		These and other data structures for nearest-neighbor
		searching are surveyed.
   },
	Annote = {
        Some dimensions discussed: box, packing, Hausdorff, Assouad,
        pointwise, information, correlation, quantization, energy,
        Renyi.
   }}

@inproceedings{enet_tris,
	Author = {Kenneth L. Clarkson},
	Booktitle = {STOC '06: } # proc # {Thirty-Eighth  } # stoc,
	Demo = {graded/demo.xml},
	Dvi = {enet_tris/p.dvi},
	Pdf = {enet_tris/p.pdf},
	Ps = {enet_tris/p.ps},
	Slides = {enet_tris/t/t.xml},
	Slidesnote1 = {y},
	Title = {Building Triangulations Using `\epsilon`-Nets},
	Versions = { Revised from 20/2/06: patches to upper bound proof, lower bound proof, many typos etc.
 Revised from version of 11/19/05: better upper bound proof, typos in Dudley example, cites peyre/cohen.
 },
	Year = {2006},
	Abstract = {
		This work addresses the problem of approximating
		a manifold by a simplicial mesh, and the related
		problem of building triangulations for the purpose of
		piecewise-linear approximation of functions.  It has
		long been understood that the vertices of such meshes
		or triangulations should be "well-distributed," or
		satisfy certain "sampling conditions."  This work
		clarifies and extends some algorithms for finding such
		well-distributed vertices, by showing that they can
		be regarded as finding `\epsilon`-nets or 
		Delone sets in appropriate metric spaces.  In some cases
		where such Delone properties were already understood,
		such as for meshes to approximate smooth manifolds that
		bound convex bodies, the upper and lower bound results
		are extended to more general manifolds; in particular,
		under some natural conditions, the minimum Hausdorff
		distance for a mesh with `n` simplices to a `d`-manifold
		`M` is
		$$\Theta((\int_M\sqrt{|\kappa(x)|}/n)^{2/d})$$
		as `n\rightarrow\infty`, where `\kappa(x)` is the
		Gaussian curvature at point `x\in M`.  We also relate
		these constructions to Dudley's approximation scheme for
		convex bodies, which can be interpreted as involving an
		`\epsilon`-net in a metric space whose distance function
		depends on surface normals.  Finally, a novel scheme is
		given, based on the Steinhaus transform, for scaling
		a metric space by a Lipschitz function to obtain a
		new metric.  This scheme is applied to show that some
		algorithms for building finite element meshes and for
		surface reconstruction can be also be interpreted in the
		framework of metric space `\epsilon`-nets.
 }}


@inproceedings{rev_interference,
	Author = {Kenneth L. Clarkson and K. Georg Hampel and John D. Hobby},
	Booktitle = {VTC-2007-Fall: IEEE Vehicular Technology Conference},
	Dvi = {rev_interference/p.dvi},
	Note = {to appear, IEEE Transactions on Wireless Communications},
	Pdf = {rev_interference/p.pdf},
	Ps = {rev_interference/p.ps},
	Title = {Modeling UpLink Power Control with Outage Probabilities},
	Year = {2007},
	Abstract = {
            We investigate models for uplink interference in wireless systems.
            Our models account for the effects of outage probabilities.
            Such an accounting requires a nonlinear, even nonconvex model,
            since increasing interference
            at the receiving base station increases both mobile transmit power
            and outage probability, and this results in a complex interaction.
            Our system model always has at least one solution, a fixed point,
            and it is provably unique under certain reasonable conditions.  Our main
            purpose is to model real wireless systems as accurately as possible,
            and so we test our models on realistic scenarios using data from a
            sophisticated simulator.  Our algorithm for finding a fixed point works
            very well on such scenarios, and is guaranteed to find the fixed point
            when we can prove it is unique.  A slightly simplified model reduces the
            main data structure for a $K$-sector market to $16K^2$ bytes of memory.
	}}

@inproceedings{loss_model,
	Author = {Kenneth L. Clarkson and John D. Hobby},
	Booktitle = {VTC-2007-Fall: IEEE Vehicular Technology Conference},
	Dvi = {loss_model/p.dvi},
	Note = {Winner, conference Best Paper award.},
	Pdf = {loss_model/p.pdf},
	Ps = {loss_model/p.ps},
	Title = {Ocelot's Knapsack Calculations for Modeling Power Amplifier and {W}alsh Code Limits},
	Year = {2007},
	Abstract = {
            We give a model for the performance impact on wireless systems of
            the limitations of certain resources, namely, the base-station power
            amplifier and the available OVSF codes. These limitations are
            readily modeled in the loss model formulation as a
            stochastic knapsack. A simple and well-known recurrence of
            Kaufman and Roberts allows the predictions of the model to be
            efficiently calculated. We discuss the assumptions and
            approximations we have made that allow the use of the model. We have
            included the model in Ocelot, a Lucent tool for modeling and
            optimizing cellular phone systems. The model is fast to compute,
            differentiable with respect to the relevant parameters, and able to
            model broad ranges of capacity and resource use. These conditions
            are critical to our application of optimization.
        }}

@article{sga,
	Appearedin = {SODA '08: } # proc # {Nineteenth  } # soda,
	Author = {Kenneth L. Clarkson},
	Ayear = {2008},
	Demo = {sga/t/demo/demo.svg},
	Dvi = {sga/p.dvi},
	Journal = talg,
	Number = 4,
	Pdf = {sga/p.pdf},
	Ps = {sga/p.ps},
	Slides = {sga/t/t.xml},
	Slidesnote1 = {y},
	Slidesnote2 = {y},
	Title = {Coresets, Sparse Greedy Approximation, and the {F}rank-{W}olfe Algorithm},
	Versions = { July 22, 2008: Various corrections (actually done October 2007)
 June 19, 2007: Corrected probabilistic existence proof (per K. Varadarajan), typos
 April 29, 2007: Clearer statement of results, cleaner proof for algorithms with away steps, typos
 March 26, 2007: First version
 },
	Volume = 6,
	Year = {2010},
	Abstract = {
		The problem of maximizing a concave function $f(x)$ in a simplex $S$ can be
		solved approximately by a simple greedy algorithm, that for given $k$ can
		find a point $x_{(k)}$ on a $k$-dimensional face such that $f(x_{(k)}) \ge
		f(x_*) - O(1/k)$, where $f(x_*)$ is the maximum value of $f$ in $S$. This
		algorithm and analysis were known before, and related to problems of
		statistics and machine learning, such as boosting, regression, and density
		mixture estimation. In other work, coming from computational geometry, the
		existence of  $\epsilon$-coresets was shown for the minimum
		enclosing ball problem, by means of a simple greedy algorithm. Similar
		greedy algorithms, that are special cases of the Frank-Wolfe algorithm, were
		described for other enclosure problems. Here these results are tied
		together, stronger convergence results are reviewed, and several coreset
		bounds are generalized or strengthened.
 }}

@inproceedings{jl_manifold,
	Author = {Kenneth L. Clarkson},
	Booktitle = {SoCG '08: } # proc # {Twenty-Fourth  } # socg,
	Dvi = {jl_manifold/p.dvi},
	Pdf = {jl_manifold/p.pdf},
	Ps = {jl_manifold/p.ps},
	Slides = {jl_manifold/t2/t.xml},
	Slidesnote1 = {y},
	Slidesnote2 = {y},
	Title = {Tighter Bounds for Random Projections of Manifolds},
	Versions = { July 22, 2008: Various corrections (actually done March 2008)
 December 2007: First version
 },
	Year = {2008},
	Abstract = {
		The Johnson-Lindenstrauss random projection lemma gives a simple way to
		reduce the dimensionality of a set of points while approximately preserving
		their pairwise distances. The most direct application of the lemma applies
		to a finite set of points, but recent work has extended the technique to
		affine subspaces, curves, and general smooth manifolds. Here the case of
		random projection of smooth manifolds is considered, and a previous analysis
		is sharpened, reducing the dependence on such properties as the manifold's
		maximum curvature.}}

@inproceedings{self_improve,
	Author = {Kenneth L. Clarkson and C. Seshadhri},
	Booktitle = {SoCG '08: } # proc # {Twenty-Fourth  } # socg,
	Dvi = {self_improve/p.dvi},
	Pdf = {self_improve/p.pdf},
	Ps = {self_improve/p.ps},
	Slidesnote1 = {n},
	Slidesnote2 = {n},
	Title = {Self-Improving Algorithms for {D}elaunay Triangulations},
	Year = {2008},
	Abstract = {
		We study the problem of two-dimensional Delaunay triangulation in the
		self-improving algorithms model. We assume that the `n`points of the input
		each come from an independent, unknown, and arbitrary distribution. The
		first phase of our algorithm builds data structures that store relevant
		information about the input distribution. The second phase uses these data
		structures to efficiently compute the Delaunay triangulation of the input.
		The running time of our algorithm matches the information-theoretic lower
		bound for the given input distribution, implying that if the input
		distribution has low entropy, then our algorithm beats the standard
		`\Omega(n \log n)` bound for computing Delaunay triangulations.
		
		Our algorithm and analysis use a variety of techniques: `\epsilon`-nets for
		disks, entropy-optimal point-location data structures, linear-time splitting
		of Delaunay triangulations, and information-theoretic arguments. }}

@article{self_improve2,
	Appearedas = {Self-Improving Algorithms for {D}elaunay Triangulations},
	Appearedin = {SoCG '08: } # proc # {Twenty-Fourth  } # socg,
	Author = {Nir Ailon and Bernard Chazelle and Kenneth L. Clarkson and Ding Liu and Wolfgang Mulzer and C. Seshadhri},
	Ayear = 2008,
	Slides = {self_improve/t/t.xml},
	Slidesnote1 = {n},
	Slidesnote2 = {n},
	Journal = sicomp,
	Note = {The prior paper on Delaunay triangulations with C. Seshadhri was combined with the paper by the other authors that introduced the model.},
	Number = 2,
	Title = {Self-Improving Algorithms},
	Url = {http://arxiv.org/abs/0907.0884},
	Volume = 40,
	Year = 2011,
	Abstract = {
            We investigate ways in which an algorithm can improve its expected performance
            by fine-tuning itself automatically with respect to an unknown input
            distribution $D$. We assume here that $D$ is of product type. More precisely,
            suppose that we need to process a sequence $I_1, I_2,\ldots$ of inputs
            $ I = (x_1, x_2, \ldots, x_n)$ of some fixed length $n$, where each $x_i$ is drawn independently
            from some arbitrary, unknown distribution $D_i$. The goal is to design an
            algorithm for these inputs so that eventually the expected running time will
            be optimal for the input distribution $D = D_1 * D_2 * ... * D_n$.
            We give such self-improving algorithms for two problems: (i) sorting a sequence
            of numbers and (ii) computing the Delaunay triangulation of a planar point set.
            Both algorithms achieve optimal expected limiting complexity. The algorithms
            begin with a training phase during which they collect information about the
            input distribution, followed by a stationary regime in which the algorithms
            settle to their optimized incarnations. 
        },
	Bdsk-Url-1 = {http://arxiv.org/abs/0907.0884}}

@unpublished{socg_talk,
	Author = {Kenneth L. Clarkson},
	Nobib = {y},
	Note = {Survey talk given at SoCG; please see later slides mostly incorporating this material},
	Title = {Geometry is Everywhere, Part {XLVII}: Metrics, Nets, Dimensions, and Measures},
	Year = {2008}}

@inproceedings{one_pass,
	Author = {Kenneth L. Clarkson and David P. Woodruff},
	Booktitle = {STOC '09: } # proc # {Forty-First  } # stoc,
	Dvi = {one_pass/p.dvi},
	Pdf = {one_pass/p.pdf},
	Ps = {one_pass/p.ps.gz.ps},
	Slides = {one_pass/t3/t.xml},
	Slidesnote1 = {n},
	Slidesnote2 = {n},
	Title = {Numerical Linear Algebra in the Streaming Model},
	Year = {2009},
	Abstract = {
		We give near-optimal space bounds in the streaming model for linear algebra
		problems that include estimation of matrix products, linear regression,
		low-rank approximation, and
		approximation of matrix rank. In the streaming model, sketches of input matrices
		are maintained under updates of matrix entries; we prove results for
		turnstile updates, given in an arbitrary order. We give the
		first lower bounds known for the space needed by the sketches, for a given
		estimation error $\epsilon$. We sharpen prior upper bounds, with respect to
		combinations of
		space, failure probability, and number of passes. 
		The sketch we use for matrix $A$ is simply $S^TA$, where $S$ is a sign matrix.
		
		Our results include the following upper and lower bounds on the bits of
		space needed for $1$-pass
		algorithms. Here $A$ is an $n\times d$ matrix, $B$ is an $n\times d'$ matrix,
		and $c := d+d'$. These results are given for fixed failure probability;
		for failure probability $\delta>0$,
		the upper bounds require a factor of $\log(1/\delta)$ more space.
		We assume the inputs have integer entries specified by
		$O(\log(nc))$ bits, or $O(\log(nd))$ bits. The Frobenius matrix norm is
		used.
		
		(Matrix Product)
		 Output matrix $C$ with
		  $$|| A^TB-C || \leq \epsilon ||A|| ||B||.$$
		We show that $\Theta(c\epsilon^{-2}\log(nc))$ space is needed.
		(Linear Regression)
		For $d'=1$, so that $B$ is a vector $b$, find $x$ so that
		  $$||Ax-b|| \leq (1+\epsilon)\min_{x' \in \reals^d} || Ax'-b ||.$$
		We show that
		$\Theta(d^2\epsilon^{-1}\log(nd))$ space is needed.
		
		(Rank-$k$ Approximation)
		Find matrix $\hat A_k$ of rank no more than $k$,
		so that
		  $$|| A - \hat A_k|| \leq (1+\epsilon)||A-A_k||,$$
		where $A_k$ is the best rank-$k$ approximation to $A$.
		Our lower bound is $\Omega(k\epsilon^{-1}(n+d)\log(nd))$ space,
		and we give a one-pass algorithm matching this when $A$ is given
		row-wise or column-wise. For general updates, we give a one-pass algorithm
		needing
		$$
		    O(k\epsilon^{-2}(n + d/\epsilon^2)\log(nd))
		$$
		space.  We also give upper and lower bounds
		for algorithms using multiple passes, and a bicriteria
		low-rank approximation.
		
		
 }}

@article{multi_cover,
	Appearedin = {SoCG '09: } # proc # {Twenty-Fifth  } # socg,
	Author = {Chandra Chekuri and Kenneth L. Clarkson and Sariel Har-Peled},
	Ayear = {2009},
	Journal = talg,
	Number = 1,
	Pdf = {multi_cover/set_cover.pdf},
	Slidesnote1 = {n},
	Slidesnote2 = {n},
	Title = {On the Set Multi-Cover Problem in Geometric Settings},
	Url = {http://arxiv.org/abs/0909.0537},
	Volume = 9,
	Year = 2012,
	Abstract = {
            We consider the set multi-cover problem in geometric
            settings. Given a set of points $P$ and a collection of
            geometric shapes (or sets) $\cal F$, we wish to find a minimum
            cardinality subset of $\cal F$ such that each point $p \in
            P$ is covered by (contained in) at least $\textrm{d}(p)$
            sets. Here $\textrm{d}(p)$ is an integer demand (requirement) for
            $p$. When the demands $\textrm{d}(p)=1$ for all $p$, this is
            the standard set cover problem. The set cover problem in geometric
            settings admits an approximation ratio that is better than that
            for the general version. In this paper, we show that similar
            improvements can be obtained for the multi-cover problem as well.
            In particular, we obtain an $O(\log\ OPT)$ approximation for set
            systems of bounded VC-dimension, and an $O(1)$ approximation for
            covering points by half-spaces in three dimensions and for some
            other classes of shapes.
            },
	Bdsk-Url-1 = {http://arxiv.org/abs/0909.0537}}

@inproceedings{schema_covering,
	Author = {Barna Saha and Ioana Stanoi and Kenneth L. Clarkson},
	Bibsource = {DBLP, http://dblp.uni-trier.de},
	Booktitle = {ICDE},
	Pages = {285-296},
	Title = {Schema covering: a step towards enabling reuse in information integration},
	Url = {http://dx.doi.org/10.1109/ICDE.2010.5447853},
	Year = {2010},
	Bdsk-Url-1 = {http://dx.doi.org/10.1109/ICDE.2010.5447853}}

@incollection{graph_anon,
	Author = {Kenneth Clarkson and Kun Liu and Evimaria Terzi},
	Booktitle = {Link Mining: Models Algorithms and Applications},
	Editors = {C. Faloutsos, J. Han and P. Yu},
	Title = {Towards Identity Anonymization in Social Networks},
	Year = {2010}}

@inproceedings{sich,
	Author = {Kenneth L. Clarkson and Wolfgang Mulzer and C. Seshadhri},
	Booktitle = {SODA 2010: } # proc # {Twenty-First } # soda,
	Title = {Self-improving Algorithms for Convex Hulls},
	Year = {2010},
	Abstract = {
            We describe an algorithm for computing planar convex hulls in the
            self-improving model: given a sequence $I_1, I_2, \ldots$ of planar $n$-point
            sets, the upper convex hull $\conv(I)$ of each set $I$ is desired. We assume
            that there exists a probability distribution $D$ on $n$-point sets, such that
            the inputs $I_j$ are drawn independently according to $D$. Furthermore, $D$ is
            such that the individual points are distributed independently of each other.
            In other words, the $i$'th point is distributed according to $D_i$. The
            $D_i$'s can be arbitrary but are independent of each other. The distribution
            $D$ is not known to the algorithm in advance. After a learning phase of
            $n^\epsilon$ rounds, the expected time to compute $\conv(I)$ is $O(n +
            H(\conv(I)))$. Here, $H(\conv(I))$ is the entropy of the output, which is a
            lower bound for the expected running time of any algebraic computation
            tree that computes the convex hull. (More precisely, $H(\conv(I))$ is the
            minimum entropy of any random variable that maps $I$ to a description of
            $\conv(I)$ and to a labeling scheme that proves nonextremality for every point
            in $I$ not on the hull.) Our algorithm is thus asymptotically optimal for $D$. 
        },
	Annote = {Bug found! Please see our later paper.}}
        

@article{sketchopt,
	Appearedin = {FOCS 2010: } # proc # {Fifty-First } # focs,
	Author = {Kenneth L. Clarkson and Elad Hazan and David P. Woodruff},
	Ayear = 2010,
	Demo = {sketchopt/t2/demo/demo.svg},
	Journal = jacm,
	Number = 5,
	Slides = {sketchopt/t2/t.html},
	Title = {Sublinear Optimization for Machine Learning},
	Url = {http://arxiv.org/abs/1010.4408},
	Volume = 59,
	Year = 2012,
	Abstract = {
		We give sublinear-time approximation algorithms for some optimization problems
		arising in machine learning, such as training linear classifiers and finding
		minimum enclosing balls. Our algorithms can be extended to some kernelized
		versions of these problems, such as SVDD, hard margin SVM, and $L_2$-SVM, for
		which sublinear-time algorithms were not known before. These new algorithms use
		a combination of a novel sampling techniques and a new multiplicative update
		algorithm. We give lower bounds which show the running times of many of our
		algorithms to be nearly best possible in the unit-cost RAM model. We also give
		implementations of our algorithms in the semi-streaming setting, obtaining the
		first low pass polylogarithmic space and sublinear time algorithms
		achieving arbitrary approximation factor.
	    },
	Bdsk-Url-1 = {http://arxiv.org/abs/1010.4408}}
        


@unpublished{aarhus_school,
	Author = {Kenneth L. Clarkson},
	Demo = {socg_08/energy_demo/demo.svg},
	Nobib = {y},
	Note = {Tutorial for MADALGO & CTIC Summer School on High-Dimensional Geometric Computing, Aarhus University, Denmark},
	Slides = {aarhus_school/t.html},
	Slidesnote1 = {y},
	Slidesnote2 = {y},
	Title = {Metric Spaces, Nets, Dimensions, and Search},
	Year = {2011},
        Month = {August},
	Abstract = {
            The idea of a metric space is among the most basic of geometric concepts,
            and so appears in a great variety of applications and algorithms, sometimes
            in disguise.  This is a light survey of concepts and constructions
            associated with metric spaces, including:
                
              Metric transformations
              `\epsilon`-nets, the greedy algorithm, and applications
              A non-greedy algorithm and a few non-`\epsilon`-nets
              Box dimension and coping with finiteness
              
                Definitions of dimension that make sense for finite sets
                Estimation of dimension using finite samples
              
              Filtrations
              Of random subsets, k-medians, `\epsilon`-nets
              Neighbors, generalizing "Delaunay neighbors"
              For witness complexes and NN searching
              Interpolation
              
                Interpolation of scattered data, Laplacians
                Interpolation in metric spaces 
                A curious approach via "witness stealing"
              
            
            A general theme here is "coping with finiteness", in trying to
            apply concepts developed for infinite sets to a finite setting,
            or finding frameworks that apply in both settings. Another theme
            is the extent to which some constructions allow the resolution,
            or scale of measurement, to be determined by the data.
          },
          Annote = {
         This is yet another version of a long
         continuing
         interest,
         with pointers to work on meshes, and
          nearest neighbor searching.
         The animations' javascript code is not for the squeamish.
 }}




@unpublished{paris_school,
	Author = {Kenneth L. Clarkson},
	Demo = {paris_school/demo_centroid/demo.svg},
	Nobib = {y},
	Note = {Tutorial for kickoff workshop of CG Learning Project},
	Slides = {paris_school/t.html},
	Slidesnote1 = {y},
	Slidesnote2 = {y},
	Title = {Geometric Optimization and/or/in Machine Learning, and Related Topics},
	Year = {2011},
        Month = {June},
	Abstract = {
        The outline:
        
            Some geometric problems, algorithms, analysis
            
              Approximation algorithms
              First order methods (no second derivatives)
              Analysis using on-line convex optimization formalism
              
                But the problems and algorithms are not on-line
              
            
            Fast randomized approximate evaluation of dot products
            
              Alternatives to JL
            
            Matrix Bernstein and applications
            A primal-dual algorithm
            (Significant) algorithmic results are joint with Elad Hazan and David Woodruff
          
          
          }}

@article{sime,
	Appearedas = {Self-improving algorithms for coordinate-wise maxima},
	Appearedin = {SoCG '12: } # proc # {Twenty-Eighth  } # socg,
	Author = {Kenneth L. Clarkson and Wolfgang Mulzer and C. Seshadhri},
	Ayear = {2012},
	Title = {Self-improving Algorithms for Coordinate-Wise Maxima and Convex Hulls},
	Url = {http://arxiv.org/abs/1211.0952},
	Abstract = {
		Finding the coordinate-wise maxima and the convex 
		hull of a planar point set are probably the most 
		classic problems in computational geometry.  We 
		consider these problems in the self-improving setting.
		Here, we have $n$ distributions $\cD_1, \ldots, \cD_n$ 
		of planar points. An input point set $(p_1, \ldots, p_n)$ 
		is generated by taking an independent sample $p_i$ 
		from each $\cD_i$, so the input is distributed 
		according to the product $\cD = \prod_i \cD_i$. 
		A self-improving algorithm repeatedly gets inputs 
		from the distribution $\cD$ (which is a priori
		unknown), and it tries to optimize its running time 
		for $\cD$. The algorithm uses the first few inputs to 
		learn salient features of the distribution $\cD$, before 
		it becomes fine-tuned to $\cD$. Let $\OPTMAX_\cD$ 
	      (resp. $\OPTCH_\cD$) be the expected depth 
		of an optimal linear comparison tree computing 
		the maxima (resp. convex hull) for $\cD$. 
		Our maxima algorithm eventually achieves expected 
		running time $O(\OPTMAX_\cD + n)$. 
		Furthermore, we give a self-improving algorithm for convex hulls
		with expected running time $O(\OPTCH_\cD + n\log\log n)$.
	      
		Our results require new tools for understanding linear 
		comparison trees. In particular, we convert 
		a general linear comparison tree to a restricted 
		version that can then be related to the running time 
		of our algorithms. Another interesting feature 
		is an interleaved search procedure 
		to determine the likeliest point to be extremal 
		with minimal computation. This allows our algorithms
		to be competitive with the optimal algorithm for 
		$\cD$.
  },
	Annote = {Simpler and more verifiable algorithm for convex hulls than
	the previous one of ours, whose proof is buggy.
  },
	Bdsk-Url-1 = {http://arxiv.org/abs/1211.0952}}

@inproceedings{FCT,
	Author = {Kenneth L. Clarkson and Petros Drineas and Malik Magdon-Ismail and Michael W. Mahoney and Xiangrui Meng and David P. Woodruff},
	booktitle={Proceedings of the Twenty-Fourth Annual ACM-SIAM Symposium on Discrete Algorithms},
	organization={SIAM},
	Pages = {466-477},
	Title = {The Fast {C}auchy Transform and Faster Robust Linear Regression},
	Url = {http://arxiv.org/abs/1207.4684},
	Year = {2013},
	Abstract = {We provide fast algorithms for overconstrained $\ell_p$ regression and 
		related problems: for an $n\times d$ input matrix $A$ and vector $b\in\R^n$, 
		in $O(nd\log n)$ time we reduce the problem $\min_{x\in\R^d} \norm{Ax-b}_p$ 
		to the same problem with input matrix $\tilde A$ of dimension $s \times d$ 
		and corresponding $\tilde b$ of dimension $s\times 1$.
		Here,  $\tilde A$ and $\tilde b$ are a coreset for the 
		problem, consisting of sampled and rescaled rows of $A$ and $b$;
		and $s$ is independent of $n$ and polynomial in $d$.
		Our results improve on the best previous algorithms when $n\gg d$, for all 
		$p\in [1,\infty)$ except $p=2$; in particular, they improve the 
		$O(nd^{1.376+})$ running time of Sohler and Woodruff (STOC, 2011) for 
		$p=1$, that uses asymptotically fast matrix multiplication, and the 
		$O(nd^5\log n)$ time of Dasgupta et al. (SICOMP, 2009) for general 
		$p$, that uses ellipsoidal rounding.
		We also provide a suite of improved results for finding well-conditioned 
		bases via ellipsoidal rounding, illustrating tradeoffs between running time 
		and conditioning quality, including a one-pass conditioning algorithm for 
		general $\ell_p$ problems.
		
		To complement this theory, we provide a detdailed empirical evaluation of 
		implementations of our algorithms for $p=1$, comparing them with several 
		related algorithms.
		Among other things, our empirical results clearly show that, in the 
		asymptotic regime, the theory is a very good guide to the practical 
		performance of these algorithms.
		Our algorithms use our faster constructions of well-conditioned bases for 
		$\ell_p$ spaces and, for $p=1$, a fast subspace embedding of independent 
		interest that we call the Fast Cauchy Transform:
		a matrix $\Pi: \R^n\mapsto \R^{O(d\log d)}$, found obliviously to $A$, that 
		approximately preserves the $\ell_1$ norms:
		that is, $\norm{Ax}_1 \approx \norm{\Pi Ax}_1$, for all $x$, with distortion 
		$O(d^{2+\eta} \log d)$, for an arbitrarily small constant $\eta>0$;
		and, moreover, $\Pi A$ can be computed in 
		$O(nd\log d)$ time. 
		The techniques underlying our Fast Cauchy Transform include fast 
		Johnson-Lindenstrauss transforms, low-coherence matrices, and rescaling by 
		Cauchy random variables.},
	Bdsk-Url-1 = {http://arxiv.org/abs/1207.4684}}

@inproceedings{sparse_embed,
	Author = {Kenneth L. Clarkson and David P. Woodruff},
	Booktitle = stoc,
	Note = {Co-winner, Best Paper Award.},
	Pages = {81-90},
	Slides = {sparse_embed/t.html},
	Slidesnote1 = {y},
	Title = {Low rank approximation and regression in input sparsity time},
	Url = {http://arxiv.org/abs/1207.6365},
	Year = {2013},
	Abstract = {
		We design a new distribution over $\poly(r
		\eps^{-1}) \times n$ matrices S so that for any
		fixed $n\times d$ matrix A of rank r, with
		probability at least 9/10, $\norm{SAx}_2 = (1 \pm
		\eps)\norm{Ax}_2$ simultaneously for all $x\in\R^d$.
		Such a matrix $S$ is called a subspace
		embedding Furthermore, SA can be computed in
		$O(\nnz{A})+ \poly(d \eps^{-1})$ time, where
		$\nnz{A}$ is the number of non-zero entries of A.
		This improves over all previous subspace embeddings,
		which required at least $\Omega(nd\log d)$ time to
		achieve this property. We call our matrices $S$
		sparse embedding matrices.
		Using our sparse embedding matrices, we obtain the fastest
		known algorithms for $(1+\eps)$-approximation for
		overconstrained least-squares regression, low-rank
		approximation, approximating all leverage scores, and
		$\ell_p$-regression. The leading order term in the time
		complexity of our algorithms is $O(\nnz{A})$ or
		$O(\nnz{A}\log n)$. We optimize the low-order
		$\poly(d/\eps)$ terms in our running times (or for rank-$k$
		approximation, the $n*\poly(k/\eps)$ term), and show various
		tradeoffs. For instance, we also use our methods to design
		new preconditioners that improve the dependence on $\eps$ in
		least squares regression to $\log 1/\eps$. Finally, we
		provide preliminary experimental results which suggest that
		our algorithms are competitive in practice. },
	Annote = {See also Jelani Nelson and Huy L. Nguyen's OSNAP work,
		and a related paper
			by Xiangrui Meng and Michael Mahoney.},
	Bdsk-Url-1 = {http://arxiv.org/abs/1207.6365}}
	
	
@inproceedings{sample_sketch,
	Author = {Kenneth L. Clarkson and David P. Woodruff},
	booktitle={Proceedings of the Twenty-Sixth Annual ACM-SIAM Symposium on Discrete Algorithms},
	pages={921--939},
	year={2015},
	organization={SIAM},
	Title = {Sketching for ${M}$-Estimators: A Unified Approach to Robust Regression},
	Pdf = {sample_sketch/p.pdf},
	slides={sample_sketch/t2/t.html},
	Slidesnote1 = {n},
	Slidesnote2 = {n},
	Abstract = {
		We give algorithms for the $M$-estimators $\min_x \norm{Ax-b}_G$, where $A\in\R^{n\times d}
		$ and $b\in\R^n$, and $\norm{y}_G$ for $y\in\R^n$ is specified by a cost function $G:\R\mapsto \R^{\geq 0}$, 
		with $\norm{y}_G \equiv \sum_i G(y_i)$. The $M$-estimators generalize $\ell_p$ regression, for which 
		$G(x)=|x|^p$. We first show that the Huber measure can be computed up to relative error $
		\epsilon$ in $O(\nnz{A}\log n+ \poly(d (\log n)/\eps))$ time, where $\nnz{A}$ denotes the number of 
		non-zero entries of the matrix $A$. Huber is arguably the most widely used $M$-estimator, enjoying 
		the robustness properties of $\ell_1$ as well as the smoothness properties of $\ell_2$.
		
		We next develop algorithms for general $M$-estimators. We analyze the $M$-sketch, which is 
		a variation of a sketch introduced by Verbin and Zhang in the context of estimating the earthmover 
		distance. We show that the $M$-sketch can be used much more generally for sketching any $M$-
		estimator provided $G$ has growth that is at least linear and at most quadratic.
		Using the $M$-sketch we solve the $M$-estimation problem in $O(\nnz{A} + \poly(d \log n))$ time for 
		any such $G$ that is convex, making a single pass over the matrix and finding a solution whose 
		residual error is within a constant factor of optimal, with high probability.
		},
		Annote = {Results for regression generalized in following paper},
   }
   

@unpublished{g2s3,
	Author = {Kenneth L. Clarkson},
	Nobib = {y},
	Note = {Tutorial for Gene Golub Summer School},
	Slides = {http://kenclarkson.org/G2S3_2015/},
	Slidesnote1 = {y},
	Slidesnote2 = {y},
	Title = {Sketching for Matrix Computations},
	Year = {2015},
        Month = {July},
	Abstract = {
	
        The outline:
	    
		    Embeddings and Regression
		    Sub-Gaussian Matrices
		    Multiplication and Embedding
		    Sparse Embeddings
		    Leverage Scores and Pre-conditioned Least Squares
		    Subsampled Randomized Hadamard Transforms
		    Least-squares and Multiple-Response Regression
		    Low-rank Approximation
		    Random Fourier Features
		    
	              }
    }



    
    @inproceedings{L1L2,
	Author = {Kenneth L. Clarkson and David P. Woodruff},
	booktitle={Foundations of Computer Science (FOCS), 2015 IEEE 56th Annual Symposium on},
	pages={310--329},
	year={2015},
	organization={IEEE},
	Slides={L1L2/t/},
	Slidesnote1 = {y},
	Slidesnote2 = {y},
	Title = {Input Sparsity and Hardness for Robust Subspace Approximation},
	Url = {http://arxiv.org/abs/1510.06073},
	Year = {2015},
	Abstract = {
		In the subspace approximation problem, we seek a $k$-dimensional subspace $F$ of $\R^d$ that minimizes
		the sum of $p$-th powers of Euclidean distances to a given set of $n$ points $a_1, \ldots, a_n \in \R^d$, 
		for $p \geq 1$. 
		More generally than minimizing $\sum_i \dist(a_i,F)^p$,
		we may wish to minimize $\sum_i M(\dist(a_i,F))$ for some loss function $M()$, for example, $M$-Estimators, which
		include the Huber and Tukey loss functions. 
		Such subspaces provide alternatives
		to the singular value decomposition (SVD), which is the $p=2$ case, finding
		such an $F$ that minimizes the sum of squares of distances. For $p \in [1,2)$,
		and for typical $M$-Estimators, the minimizing $F$ gives a solution that is more robust to
		outliers than that provided by the SVD.
		We give several algorithmic results for these robust subspace approximation problems.
		
		We state our results as follows, thinking of the $n$ points as forming an $n \times d$ matrix $A$, and 
		letting $\nnz{A}$
		denote the number of non-zero entries of $A$. 
		Our results hold for $p\in [1,2)$.
		We use $\poly(n)$ to denote $n^{O(1)}$ as $n\rightarrow\infty$.
		
		For minimizing $\sum_i \dist(a_i,F)^p$, 
		we give an algorithm running in
		\[
		O(\nnz{A} + (n+d)\poly(k/\eps) + \exp(\poly(k/\eps)))
		\]
		time which outputs a $k$-dimensional 
		subspace $F$ whose cost is at most a $(1+\eps)$-factor larger than the optimum.
		We show that the problem of minimizing $\sum_i \dist(a_i, F)^p$ 
		is NP-hard, even to output a $(1+1/\poly(d))$-approximation. This extends work of Deshpande et al. (SODA, 2011) which could only
		show NP-hardness or UGC-hardness for $p > 2$; their proofs critically rely on $p > 2$. Our work resolves an open question
		of [Kannan Vempala, NOW, 2009]. Thus, there cannot be an algorithm running in time polynomial in $k$ and $1/\eps$ unless P = NP.
		Together with prior work, this implies that the problem is NP-hard for all $p \neq 2$.
		
		 For loss functions for a wide class of $M$-Estimators, we give a problem-size reduction:
		for a parameter $K=(\log n)^{O(\log k)}$, our reduction takes
		\[
		O(\nnz{A}\log n + (n+d)\poly(K/\eps))
		\]
		time to reduce the problem to a constrained version involving matrices whose dimensions
		are $\poly(K\eps^{-1}\log n)$. We also give bicriteria solutions.
		
		 Our techniques lead to the first $O(\nnz{A} + \poly(d/\eps))$ time algorithms for $(1+\eps)$-approximate
		regression for a wide class of convex $M$-Estimators. 
		This improves prior results, which were $(1+\eps)$-approximation for Huber regression only,
		and $O(1)$-approximation for a general class of $M$-Estimators.
		
		
		}
	}

@unpublished{bc60,
	Author = {Kenneth L. Clarkson},
	Nobib = {y},
	Annote = {Talk for Bernard's birthday; I didn't talk about everything in the abstract.},
	Slides = {http://kenclarkson.org/60BC/},
	demo = {60BC/demo/NRG2.html},
	Slidesnote1 = {y},
	Slidesnote2 = {y},
	Title = {The Thrill Goes On: Bernard Yesterday and Today},
	Year = {2016},
        Month = {July},
	Abstract = {
	
	    From the excruciatingly difficult to the achingly elegant, Bernard
	    Chazelle's work on algorithms, especially geometric or natural ones, has
	    been profoundly influential. I'll sketch a few examples that have been
	    inspiring to me, including 1-dimensional range queries, low-stabbing
	    spanning trees, high-order Voronoi diagram construction, deterministic
	    constructions, and the s-energy of a system.
	    }
    }
    
    
@unpublished{self_improve_general,
	Author = {Kenneth L. Clarkson},
	Nobib = {y},
	Annote={This work was done 2008-2012 by Nir Ailon, Bernard Chazelle, Ding Liu, Wolfgang Mulzer, C. Seshadhri, and the speaker.},
	Slides = {http://kenclarkson.org/self_improve_general/t_simons/},
	Slidesnote1 = {y},
	Slidesnote2 = {y},
	Title = {Self-improving algorithms},
	Year = {2016},
        Month = {November},
	Abstract = {
We investigate algorithms that learn to run faster: given a series of instances of a computational problem, these algorithms use characteristics of past instances to sharpen their performance for new instances. We have found such *self-improving* algorithms for sorting a list of numbers, and for some problems on planar point sets: computing Delaunay triangulations, coordinate-wise maxima, and convex hulls. Under the assumption that input instances are random variables, each algorithm begins with a training phase during which it adjusts itself to the distribution of the input instances; this is followed by a stationary regime in which the algorithm runs in its optimized version. In this setting, an algorithm must take expected time proportional to the input size `n`, and to the entropy `E` of the output: the input must be touched, and the output must be communicated. Our algorithms achieve `O(n+E)` expected running times in the stationary regime for all the problems mentioned except convex hulls, where `O(n\log\log n + E)` expected time is needed.
	    }
    }
	
@ARTICLE{krr,
   author = {Haim Avron and Kenneth L. Clarkson and David P. Woodruff},
    title = {Faster Kernel Ridge Regression Using Sketching and Preconditioning},
  journal = {ArXiv e-prints},
 url = {https://arxiv.org/abs/1611.03220},
 primaryClass = "cs.NA",
 keywords = {Computer Science - Numerical Analysis, Computer Science - Data Structures and Algorithms, Computer Science - Learning, Mathematics - Numerical Analysis},
     year = 2016,
    month = Nov,
  abstract = {
			Kernel Ridge Regression (KRR) is a simple yet powerful technique for
			non-parametric regression whose computation amounts to solving a linear
			system. This system is usually dense and highly ill-conditioned. In addition,
			the dimensions of the matrix are the same as the number of data points, so
			direct methods are unrealistic for large-scale datasets. In this paper, we
			propose a preconditioning technique for accelerating the solution of the
			aforementioned linear system. The preconditioner is based on random feature
			maps, such as random Fourier features, which have recently emerged as a
			powerful technique for speeding up and scaling the training of kernel-based
			methods, such as kernel ridge regression, by resorting to approximations.
			However, random feature maps only provide crude approximations to the kernel
			function, so delivering state-of-the-art results by directly solving the
			approximated system requires the number of random features to be very large.
			We show that random feature maps can be much more effective in forming
			preconditioners, since under certain conditions a not-too-large number of
			random features is sufficient to yield an effective preconditioner. We
			empirically evaluate our method and show it is highly effective for datasets
			of up to one million training examples. 
  }
}
	
	
@inproceedings{psdSoda,
  title={Low-rank {PSD} approximation in input-sparsity time},
  author={Kenneth L. Clarkson and David P. Woodruff},
  booktitle= {SODA '17: } # proc # {Twenty-Eighth  } # soda,
  pages={2061--2072},
  year={2017},
  pdf = {http://kenclarkson.org/psdSoda/p.pdf},
  Slides = {http://kenclarkson.org/psdSoda/index.html},
  Slidesnote1 = {y},
  Slidesnote2 = {y},
  organization={Society for Industrial and Applied Mathematics},
  abstract = {
		We give algorithms for approximation by low-rank positive semidefinite (PSD) matrices.
		For symmetric input matrix $A\in\R^{n\times n}$, target rank $k$, and error parameter $\eps>0$, one algorithm
		finds with constant probability a PSD matrix $\tY$ of rank $k$ such that $\norm{A-\tY}_F^2\le (1+\eps)\norm{A - A_{k,+}}_F^2$,
		where $A_{k,+}$ denotes the best rank-$k$ PSD approximation to $A$, and the norm
		is Frobenius. The algorithm takes time
		$O(\nnz{A}\log n) + n\poly((\log n)k/\eps) + \poly(k/\eps)$, where $\nnz{A}$ denotes the number of nonzero entries
		of $A$, and $\poly(k/\eps)$ denotes a polynomial in $k/\eps$. (There are two different polynomials in the time bound.)
		Here the output matrix $\tY$ has the form $CUC^\top$, where the $O(k/\eps)$ columns of $C$ are columns of $A$.
		In contrast to prior work, we do not require the input matrix $A$ to be PSD, our output is rank $k$ (not larger), and
		our running time is $O(\nnz{A}\log n)$ provided this is larger than $n \poly((\log n)k/\epsilon)$.
		We give a similar algorithm that is faster and simpler, but whose rank-$k$ PSD output does not involve columns
		of $A$, and does not require $A$ to be symmetric. We
		give similar algorithms for best rank-$k$ approximation subject to the constraint of symmetry.
		We also show that there are asymmetric input matrices that cannot have good symmetric column-selected approximations.
	}
}

@inproceedings{ridge_reg,
 title = {Sharper Bounds for Regularized Data Fitting},
 author = {Haim Avron and Kenneth L. Clarkson and David P. Woodruff},
 booktitle = {RANDOM '17: 21st International Workshop on Randomization and Computation},
 url = {https://arxiv.org/abs/1611.03225},
 year = {2017},
 abstract = {
	   The technique of matrix sketching, such as the use of random projections,
	   has been shown in recent years to be a powerful tool for accelerating many important
	   statistical learning techniques. Research has so far focused largely on using
	   sketching for the ``vanilla'' un-regularized versions of these techniques.
	   
	   Here we study sketching methods for regularized variants of linear regression,
	   low rank approximations, and canonical correlation analysis. We study regularization
	   both in a fairly broad setting, and in the specific context of the popular and widely
	   used technique of ridge regularization; for the latter, as applied to each of these
	   problems, we show algorithmic resource bounds in which the
	   {\em statistical dimension} appears in places where in previous bounds
	   the rank would appear.
	   The statistical dimension is always smaller than the rank, and decreases as
	   the amount of regularization increases. In particular, for the ridge low-rank
	   approximation problem
	   $\min_{Y,X} \norm{YX - A}_F^2 + \lambda\norm{Y}_F^2 + \lambda\norm{X}_F^2$,
	   where $Y\in\R^{n\times k}$ and $X\in\R^{k\times d}$,
	   we give an approximation algorithm needing 
	   \[
	   O(\nnz{A}) + \tO((n+d)\eps^{-1}k \min\{k, \eps^{-1}\sd_\lambda(Y^*)\})+ \tO(\eps^{-8} \sd_\lambda(Y^*)^3)
	   \]
	   time, where $\sd_{\lambda}(Y^*)\le k$ is the statistical dimension of $Y^*$, $Y^*$ is an optimal $Y$,
	   $\eps$ is an error parameter, and $\nnz{A}$ is the number of nonzero  entries of $A$.
	   
	   We also study regularization in a much more general setting. For example, we obtain
	   sketching-based algorithms for the low-rank approximation problem
	   $\min_{X,Y} \norm{YX - A}_F^2 + f(Y,X)$
	   where $f(\cdot,\cdot)$ is a regularizing function satisfying some very general
	   conditions (chiefly, invariance under orthogonal transformations).
 }
}