#!/bin/bash

SOURCE="${BASH_SOURCE[0]}"
while [ -h "$SOURCE" ]; do # resolve $SOURCE until the file is no longer a symlink
  DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"
  SOURCE="$(readlink "$SOURCE")"
  [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" # if $SOURCE was a relative symlink, we need to resolve it relative to the path where the symlink file was located
done
SCRIPT_DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"

ML_SERVER_ROOT="${SCRIPT_DIR}/../.."
ML_SERVER_VERSION=$(basename $(dirname ${SCRIPT_DIR}))

export PYTHONPATH=${ML_SERVER_ROOT}/libraries/PythonServer:${PYTHONPATH}

if [ -e "${SCRIPT_DIR}/../Revo-init" ] ; then
  source "${SCRIPT_DIR}/../Revo-init"
fi


function find_spark_home()
{
  if [ ! -d "${SPARK_HOME}" ] ; then
    SPARK_BIN=$(which spark-submit 2>/dev/null)
    if [ -z "${SPARK_BIN}" ] ; then
      RPM=`which rpm 2>/dev/null`
      if [ ! -z $RPM ] ; then
        SPARK_PACKAGE=`${RPM} -qa 2>/dev/null | grep spark`
        if [ ! -z "${SPARK_PACKAGE}" ] ;  then
          SPARK_BIN=`${RPM} -ql ${SPARK_PACKAGE} 2>/dev/null | grep -m 1 spark-submit$`
        fi
      fi
  
      # handle ubuntu os
      DPKG_QUERY=`which dpkg-query 2>/dev/null`
      if [ ! -z $DPKG_QUERY ] ; then
        SPARK_PACKAGE=`${DPKG_QUERY} -W -f='${Package}\n' 2>/dev/null | grep spark`
        if [ ! -z "${SPARK_PACKAGE}" ] ;  then
          SPARK_BIN=`${DPKG_QUERY} -L ${SPARK_PACKAGE} 2>/dev/null | grep -m 1 spark-submit$`
        fi
      fi
    fi
    if [ -n "${SPARK_BIN}" ]; then
      SPARK_BIN="$( readlink -f ${SPARK_BIN} 2>/dev/null )"
      SPARK_HOME="$( readlink -m ${SPARK_BIN}/../../ 2>/dev/null )"
    fi

    if [ -z "${SPARK_HOME}" ] || [ ! -d "${SPARK_HOME}/conf" ] || [ ! -d "${SPARK_HOME}/yarn" ] ; then
      # if the SPARK_HOME found is not as expected. Then try HDP specific logic first
      # reference logic in HDP /usr/bin/spark-script-wrapper.sh
      if [ -z "${SPARK_MAJOR_VERSION}" ]; then
        spark_versions="$(ls -1 "/usr/hdp/current" 2>/dev/null | grep "^spark.*-client$")"
  
        num_spark=0
        for i in $spark_versions; do
          tmp="/usr/hdp/current/${i}"
          if [ -d "${tmp}" ]; then
            num_spark=$(( $num_spark + 1 ))
            SPARK_HOME="${tmp}"
          fi
        done
  
        if [ "${num_spark}" -gt "1" ]; then
          SPARK_HOME="/usr/hdp/current/spark2-client"
        fi
  
      elif [ "${SPARK_MAJOR_VERSION}" -eq "1" ]; then
        SPARK_HOME="/usr/hdp/current/spark-client"
      elif [ "${SPARK_MAJOR_VERSION}" -eq "2" ]; then
        SPARK_HOME="/usr/hdp/current/spark2-client"
      fi
    fi
  
    if [ -z "${SPARK_HOME}" ] || [ ! -d "${SPARK_HOME}/conf" ] || [ ! -d "${SPARK_HOME}/yarn" ] ; then
      # if the SPARK_HOME found is still not as expected. Then try CDH specific logic
      # pick the last as SPARK HOME if multiple SPARK 2 versions exist
      CDH_SPARK_BASE="/log/cloudera/parcels/SPARK2*"
      for SPARK_DIR in ${CDH_SPARK_BASE}; do
        # if is a non-symlink dir
        if [ -d "${SPARK_DIR}" ] && [ ! -L "${SPARK_DIR}" ] ; then
          SPARK_HOME="${SPARK_DIR}/lib/spark2"
        fi
      done
    fi
    export SPARK_HOME
  fi
}

function set_pyspark_interop()
{
  # for pyspark interop, make sure pyspark point to mlserver-python
  export PYSPARK_PYTHON=${SOURCE}
  # find pyspark lib to include
  if [ -d "${SPARK_HOME}/python" ] ; then
    export PYTHONPATH=${SPARK_HOME}/python:${PYTHONPATH}
  fi 
  # find py4j lib to include
  ALL_PY4J="$(find ${SPARK_HOME}/python/lib/ -maxdepth 1 -type f -name "py4j*src.zip" 2> /dev/null)"
  PY4J=${ALL_PY4J[0]}
  if [ -f "$PY4J" ] ; then
    export PYTHONPATH=${PY4J}:${PYTHONPATH}
  fi
  # for worker node, also need to let it know about scaler-spark_2.11-0.1.0.jar
  if [ -n "${SPARK_SUBMIT_OPTS}" ]; then
    if [[ "${SPARK_SUBMIT_OPTS}" == *"-Dspark.jars="* ]]; then
      if [[ "${SPARK_SUBMIT_OPTS}" != *"scaler-spark_2.11-0.1.0.jar"*  ]]; then
        echo "SPARK_SUBMIT_OPTS is set while ${REVOSCALEPY_SPARK_JAR} is not include in ${SPARK_SUBMIT_OPTS} -Dspark.jars field."
        exit -1
      else
        ADD_SPARK_SUBMIT_OPTS="${SPARK_SUBMIT_OPTS}"
      fi
    else
      ADD_SPARK_SUBMIT_OPTS="-Dspark.jars=${REVOSCALEPY_SPARK_JAR} ${SPARK_SUBMIT_OPTS}"
    fi
  else
    ADD_SPARK_SUBMIT_OPTS="-Dspark.jars=${REVOSCALEPY_SPARK_JAR}"
  fi
  # for worker node, it might not be able to get PYTHONPATH from SPARK_HOME
  # then rely on spark passing config from master node 
  export SPARK_SUBMIT_OPTS="-Dspark.executorEnv.PYTHONPATH=${PYTHONPATH} ${ADD_SPARK_SUBMIT_OPTS}"
}

function start_python()
{
  REVOSCALEPY_SPARK_JAR="$( readlink -m ${SCRIPT_DIR}/../../libraries/common/hadoop/jar/scaler-spark_2.11-0.1.0.jar )"
  if [ -f "${REVOSCALEPY_SPARK_JAR}" ] ; then
    # If scaler jar exists (mrs-hadoop is installed), then setup pyspark interop
    find_spark_home
    set_pyspark_interop
  fi
  exec ${ML_SERVER_ROOT}/runtime/python/bin/python "$@"
}

function symlink_python()
{
  if [[ $(id -u) -ne 0 ]]; then
    echo "FATAL: Sudo or Root permissions are needed to link mlserver"
    return 1
  fi

  local python_path="${ML_SERVER_ROOT}/bin/python/python"

  if [[ -f /usr/bin/mlserver-python ]]; then
    local python_link=$(readlink -f /usr/bin/mlserver-python)
    echo "INFO: Found /usr/bin/mlserver-python => ${python_link}"
    echo "INFO: Replacing /usr/bin/mlserver-python with ${python_path}"
    rm /usr/bin/mlserver-python
    ln -s ${python_path} /usr/bin/mlserver-python
  else
    echo "INFO: Creating symlink /usr/bin/mlserver-python => ${python_path}"
    ln -s ${python_path} /usr/bin/mlserver-python
  fi

  return 0
}

function unsymlink_python()
{
  if [[ $(id -u) -ne 0 ]]; then
    echo "FATAL: Sudo or Root permissions are needed to unlink mlserver"
    return 1
  fi

  if [[ -f /usr/bin/mlserver-python ]]; then
    local python_link=$(readlink -f /usr/bin/mlserver-python)
    echo "INFO: Removing /usr/bin/mlserver-python => ${python_link}"
    rm /usr/bin/mlserver-python
  fi

  return 0
}

if [[ $# -gt 0 ]]; then
  if [[ "$1" == "--mlserver-symlink" ]]; then
    symlink_python
    exit $?
  elif [[ "$1" == "--mlserver-unsymlink" ]]; then
    unsymlink_python
    exit $?
  fi
fi

start_python "$@"
