\documentclass[12pt]{article}

\usepackage{graphicx}
\usepackage{amsmath,amssymb}
\usepackage{algorithm}
\usepackage{algpseudocode}
\usepackage{scrextend}
\usepackage{bbm}
\usepackage[hyphens]{url}
\usepackage{hyperref}


\newfont{\bssdoz}{cmssbx10 scaled 1200}
\newfont{\bssten}{cmssbx10}


\oddsidemargin  0in \evensidemargin 0in \topmargin -0.5in
\headheight 0.25in \headsep 0.25in
\textwidth   6.5in \textheight 9in %\marginparsep 0pt \marginparwidth 0pt
\parskip 1.5ex  \parindent 0ex \footskip 20pt

\begin{document}
\parindent 0ex
\thispagestyle{empty} \vspace*{-0.75in}
{\bssdoz CME 323: Distributed Algorithms and Optimization}
{\\ \bssten Instructor: Reza Zadeh (rezab@stanford.edu) }
{\\ \bssten HW\#4 - Due Thursday, June 6}
\vspace{5pt}

\begin{enumerate}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%



%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


  \item \textbf{Shallow Graphs} For an undirected graph $G = (V,E)$ with $n$
      vertices and $m$ edges $(m \geq n)$, we say that $G$ is shallow
      if for every pair of vertices $u,v \in V$, there is a path from $u$
      to $v$ of length at most 2 (i.e. using at most two edges).

      \begin{enumerate}         
      \item[(a)] Give an algorithm that can decide whether $G$ is shallow in $O(n^{2.376})$ time.
        \item[(b)] Given an $n\times r$ matrix $A$ and an $r \times n$ matrix $B$ where
          $r \leq n$, show that we can multiply $A$ and $B$ in
          $O\left( (n/r)^{2} r^{2.376}\right)$ time. Hint: use the fact that we can multiply
          two $r \times r$ matrices in $O(r^{2.376})$ time.
        \item[(c)] Give an algorithm that can decide whether $G$ is shallow in 
          $O(m^{0.55} n^{1.45})$ time. Hint: consider length-2 paths that go from low-degree
          vertices and length-2 paths that go through high-degree vertices separately.
          Use result from part (b).       \end{enumerate}


\item Write a Spark program to compute the Singular Value Decomposition of the following $10 \times 3$ matrix:

  \begin{verbatim}
  -0.5529181 -0.5465480 0.009519836 
  -0.5428579 -1.5623879 0.982464609 
  -1.3038629  0.5715549 0.499441144 
   0.6564096  1.1806877 0.495705999 
  -1.2061171  1.3430651 0.153477135 
   0.2938439 -1.7966043 0.914381381 
  -0.2578953  0.2596407 0.815623895 
   0.9659582  2.3697927 0.320880634 
  -0.4038109  0.9846071 0.488856619 
   0.6029003 -0.3202214 0.380347546   \end{verbatim}

Assume the matrix is tall and skinny, so the rows should be split up and inserted into an RDD. Each row can fit in memory on a single machine. Report
all singular vectors and values and submit your Spark program.


\item Given a matrix $M$ in row format as an \textsc{RDD[Array[Double]]} and a local vector $x$ given as an $\textsc{Array[Double]}$, give
Spark code to compute the matrix vector multiply $Mx$. 


\item In class we saw how to compute highly similar pairs of $m$-dimensional 
vectors $x, y$ via sampling in the mappers, where the similarity was defined by 
cosine similarity: $\frac{x^T y}{|x|_2 |y|_2}$. Show how to modify the sampling
scheme to work with overlap similarity, defined as $$\text{overlap}(x,y)=\frac{x^T y} {\min(|x|_2^2, |y|_2^2)}$$ 
\begin{enumerate}
\item Prove shuffle size is still independent of $m$, the dimension of $x$ and $y$.
\item Assuming combiners are used with $B$ mapper machines, analyze the shuffle size.
\end{enumerate}


\end{enumerate}
\end{document}
