\documentclass[12pt]{article}
\usepackage{graphicx}
\usepackage{amsmath,amssymb}
\usepackage{algorithm}
\usepackage{algpseudocode}
\usepackage{scrextend}
\usepackage{bbm}
\usepackage[hyphens]{url}
\usepackage{hyperref}
\newfont{\bssdoz}{cmssbx10 scaled 1200}
\newfont{\bssten}{cmssbx10}
\oddsidemargin 0in \evensidemargin 0in \topmargin -0.5in
\headheight 0.25in \headsep 0.25in
\textwidth 6.5in \textheight 9in %\marginparsep 0pt \marginparwidth 0pt
\parskip 1.5ex \parindent 0ex \footskip 20pt
\begin{document}
\parindent 0ex
\thispagestyle{empty} \vspace*{-0.75in}
{\bssdoz CME 323: Distributed Algorithms and Optimization}
{\\ \bssten Instructor: Reza Zadeh (rezab@stanford.edu) }
{\\ \bssten HW\#4 - Due Thursday, June 6}
\vspace{5pt}
\begin{enumerate}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\item \textbf{Shallow Graphs} For an undirected graph $G = (V,E)$ with $n$
vertices and $m$ edges $(m \geq n)$, we say that $G$ is shallow
if for every pair of vertices $u,v \in V$, there is a path from $u$
to $v$ of length at most 2 (i.e. using at most two edges).
\begin{enumerate}
\item[(a)] Give an algorithm that can decide whether $G$ is shallow in $O(n^{2.376})$ time.
\item[(b)] Given an $n\times r$ matrix $A$ and an $r \times n$ matrix $B$ where
$r \leq n$, show that we can multiply $A$ and $B$ in
$O\left( (n/r)^{2} r^{2.376}\right)$ time. Hint: use the fact that we can multiply
two $r \times r$ matrices in $O(r^{2.376})$ time.
\item[(c)] Give an algorithm that can decide whether $G$ is shallow in
$O(m^{0.55} n^{1.45})$ time. Hint: consider length-2 paths that go from low-degree
vertices and length-2 paths that go through high-degree vertices separately.
Use result from part (b). \end{enumerate}
\item Write a Spark program to compute the Singular Value Decomposition of the following $10 \times 3$ matrix:
\begin{verbatim}
-0.5529181 -0.5465480 0.009519836
-0.5428579 -1.5623879 0.982464609
-1.3038629 0.5715549 0.499441144
0.6564096 1.1806877 0.495705999
-1.2061171 1.3430651 0.153477135
0.2938439 -1.7966043 0.914381381
-0.2578953 0.2596407 0.815623895
0.9659582 2.3697927 0.320880634
-0.4038109 0.9846071 0.488856619
0.6029003 -0.3202214 0.380347546 \end{verbatim}
Assume the matrix is tall and skinny, so the rows should be split up and inserted into an RDD. Each row can fit in memory on a single machine. Report
all singular vectors and values and submit your Spark program.
\item Given a matrix $M$ in row format as an \textsc{RDD[Array[Double]]} and a local vector $x$ given as an $\textsc{Array[Double]}$, give
Spark code to compute the matrix vector multiply $Mx$.
\item In class we saw how to compute highly similar pairs of $m$-dimensional
vectors $x, y$ via sampling in the mappers, where the similarity was defined by
cosine similarity: $\frac{x^T y}{|x|_2 |y|_2}$. Show how to modify the sampling
scheme to work with overlap similarity, defined as $$\text{overlap}(x,y)=\frac{x^T y} {\min(|x|_2^2, |y|_2^2)}$$
\begin{enumerate}
\item Prove shuffle size is still independent of $m$, the dimension of $x$ and $y$.
\item Assuming combiners are used with $B$ mapper machines, analyze the shuffle size.
\end{enumerate}
\end{enumerate}
\end{document}