#!/usr/bin/env bash

# Source common variables and functions
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "${SCRIPT_DIR}/layerops-install-common"

# === Server-specific variables ===
REQUIRED_ENVVARS="ENVIRONMENT_GROUP_UUID SERVER_INSTANCE_TOKEN LAYEROPS_WORKER_URL LAYEROPS_ORCHESTRATOR_URL SPIRE_URL WIREGUARD_PRIVATE_IP LAYEROPS_API_URL"
SYSTEMCTL_BIN_PATH=/usr/bin/systemctl

# SPIRE Server specific
SPIRE_SERVER_CONFIG_FILE=${SPIRE_CONFIG_DIR}/server.conf
SPIRE_SERVER_DATA_DIR=${LAYEROPS_DATA_DIR}/spire/server
SPIRE_SERVER_PATH=${LAYEROPS_BIN_DIR}/spire-server
SPIRE_SERVER_LAUNCH_SCRIPT=${LAYEROPS_BIN_DIR}/start-spire-server
SPIRE_BUNDLE_TRUST_PORT=8082
SPIRE_SERVER_SYSTEMD_FILE=/etc/systemd/system/spire-server.service
SPIRE_TRUST_BUNDLE_PATH=${LAYEROPS_DATA_DIR}/spire/trust-bundle.pem

# Logs backup and restore (victorialogs snapshots + vector server configuration)
RCLONE_CONFIG_DIR=$LAYEROPS_ETC_DIR/rclone
RCLONE_CONFIG_FILE=${RCLONE_CONFIG_DIR}/rclone.conf
VICTORIALOGS_RCLONE_PATH_PREFIX=layerops/${ENVIRONMENT_GROUP_UUID}/victorialogs/snapshots
LOGS_S3_BUCKET=s3-layerops
VICTORIALOGS_CONTAINER_ID=${ENVIRONMENT_GROUP_UUID:0:8}-monitoring-victorialogs-${ENVIRONMENT_GROUP_UUID:0:8}
VICTORIALOGS_CONTAINER_PORT=9428
VICTORIALOGS_DATA_DIR=${LAYEROPS_DATA_DIR}/volumes/${VICTORIALOGS_CONTAINER_ID}/data
VICTORIALOGS_SNAPSHOT_SCRIPT=${LAYEROPS_BIN_DIR}/victorialogs-snapshot
VICTORIALOGS_RESTORE_SCRIPT=${LAYEROPS_BIN_DIR}/victorialogs-restore
VICTORIALOGS_SNAPSHOT_SYSTEMD_FILE=/etc/systemd/system/victorialogs-snapshot.service
VICTORIALOGS_SNAPSHOT_SYSTEMD_TIMER_FILE=/etc/systemd/system/victorialogs-snapshot.timer
VICTORIALOGS_SNAPSHOT_FREQUENCY=1hour
VECTOR_SERVER_CONTAINER_ID=${ENVIRONMENT_GROUP_UUID:0:8}-monitoring-vector-server-${ENVIRONMENT_GROUP_UUID:0:8}
VECTOR_SERVER_RESTORE_DIR=${LAYEROPS_DATA_DIR}/volumes/${VECTOR_SERVER_CONTAINER_ID}/backups
VECTOR_SERVER_S3_PATH_PREFIX=layerops/${ENVIRONMENT_GROUP_UUID}/applogs/_all

function usage {
  cat << EOF
Usage: $(basename "${BASH_SOURCE[0]}") [init|clean]
EOF
  exit 0
}

# Setup Victoria Logs S3 Backups
function _setup_victorialogs_backup() {
  mkdir -p $RCLONE_CONFIG_DIR
  chmod 750 $RCLONE_CONFIG_DIR

  cat <<EOF > $VICTORIALOGS_SNAPSHOT_SCRIPT
#!/bin/bash

grep -q "\[${LOGS_S3_BUCKET}\]" $RCLONE_CONFIG_FILE
if [ \$? -ne 0 ]
then
  echo "[VICTORIALOGS SNAPSHOT] S3 backend is not configured yet => nothing to do"
  exit 0
fi

# Get Victorialogs container address
VICTORIALOGS_CONTAINER_IP=\$(docker inspect $VICTORIALOGS_CONTAINER_ID | jq -r '.[].NetworkSettings.Networks | to_entries[0].value.IPAddress' 2> /dev/null)
if [ -z "\$VICTORIALOGS_CONTAINER_IP" ]
then
  echo "[VICTORIALOGS SNAPSHOT] Victorialogs container is not running => nothing to do"
  exit 0
fi

VICTORIALOGS_SNAPHOST_URL="http://\$VICTORIALOGS_CONTAINER_IP:$VICTORIALOGS_CONTAINER_PORT/internal/partition/snapshot"

# If we are in the first hour of the day => backup a snasphot of last day
if [ \$(date +%H) -eq 0 ]
then
  SNAPNAME=\$(date -d yesterday +%Y%m%d)
  SNAPDIR=\$(curl \${VICTORIALOGS_SNAPHOST_URL}/create?name=\$SNAPNAME | jq -r .)
  if [ ! -z "\$SNAPDIR" ]
  then
    cd \$SNAPDIR
    ls | tar -I zstd -cf - --files-from=/dev/stdin | rclone --config $RCLONE_CONFIG_FILE rcat --s3-chunk-size 100M ${LOGS_S3_BUCKET}:/${VICTORIALOGS_RCLONE_PATH_PREFIX}/\${SNAPNAME}.tar.zst
    cd -
    rm -fR \$SNAPDIR
  fi
fi

# backup a snapshot of current day
SNAPNAME=\$(date +%Y%m%d)
SNAPDIR=\$(curl \${VICTORIALOGS_SNAPHOST_URL}/create?name=\$SNAPNAME | jq -r .)
[ -z "\$SNAPDIR" ] && exit 1
cd \$SNAPDIR
ls | tar -I zstd -cf - --files-from=/dev/stdin | rclone --config $RCLONE_CONFIG_FILE rcat --s3-chunk-size 100M ${LOGS_S3_BUCKET}:/${VICTORIALOGS_RCLONE_PATH_PREFIX}/\${SNAPNAME}.tar.zst
cd -
rm -fR \$SNAPDIR

# Force UTC time for .latest tag because vector.dev uploaded files are set in UTC (cannot be overriden in Vector.dev. cf. https://vector.dev/docs/architecture/data-model/log/#timestamps)
date -Iseconds | rclone --config $RCLONE_CONFIG_FILE rcat ${LOGS_S3_BUCKET}:/${VICTORIALOGS_RCLONE_PATH_PREFIX}/.latest

EOF

  cat <<EOF > $VICTORIALOGS_SNAPSHOT_SYSTEMD_FILE
[Unit]
Description=Run Victorialogs snapshot

[Service]
Type=oneshot
ExecStart=$VICTORIALOGS_SNAPSHOT_SCRIPT
EnvironmentFile=-$LAYEROPS_ETC_DEFAULT_FILE
EOF

  cat <<EOF > $VICTORIALOGS_SNAPSHOT_SYSTEMD_TIMER_FILE
[Unit]
Description=Schedule Victorialogs snapshots

[Timer]
OnBootSec=$VICTORIALOGS_SNAPSHOT_FREQUENCY
OnUnitActiveSec=$VICTORIALOGS_SNAPSHOT_FREQUENCY

[Install]
WantedBy=timers.target
EOF

  cat <<EOF > $VICTORIALOGS_RESTORE_SCRIPT
#!/bin/bash

grep -q "\[${LOGS_S3_BUCKET}\]" $RCLONE_CONFIG_FILE
if [ \$? -ne 0 ]
then
  echo "[VICTORIALOGS RESTORE] S3 backend is not configured yet => nothing to restore"
  exit 0
fi

if [ "\$(docker inspect --format '{{.State.Status}}' $VICTORIALOGS_CONTAINER_ID 2> /dev/null)" == "running" ]
then
  echo "[VICTORIALOGS RESTORE] Victorialogs container is already running => nothing to restore"
  exit 0
fi

ls $VICTORIALOGS_DATA_DIR/partitions > /dev/null 2>&1
if [ \$? -eq 0 ]
then
  echo "[VICTORIALOGS RESTORE] Victorialogs data is not empty => nothing to restore"
  exit 0
fi

# If data is empty => try to restore snapshots
for SNAPNAME in \$(rclone --config $RCLONE_CONFIG_FILE ls ${LOGS_S3_BUCKET}:/${VICTORIALOGS_RCLONE_PATH_PREFIX} 2> /dev/null | grep .tar.zst | awk '{print \$2}')
do
  PARTITION_DIR=$VICTORIALOGS_DATA_DIR/partitions/\${SNAPNAME%.tar.zst}
  mkdir -p \$PARTITION_DIR
  cd \$PARTITION_DIR
  rclone --config $RCLONE_CONFIG_FILE cat ${LOGS_S3_BUCKET}:/${VICTORIALOGS_RCLONE_PATH_PREFIX}/\${SNAPNAME} | tar -I zstd -xf -
  cd -
done

# Get the fresh logs from S3 (@TODO: exécuter en parallèle, pour ne pas bloquer le process si gros volume de logs à restaurer)
latest=\$(rclone --config $RCLONE_CONFIG_FILE cat ${LOGS_S3_BUCKET}:/${VICTORIALOGS_RCLONE_PATH_PREFIX}/.latest)
[ -z "\$latest" ] && latest=\$(date -d "1 hour ago" -Iseconds)
now=\$(date -Iseconds)
while [ "\$(date -d "\$latest" +%Y%m%d%H%M)" -lt "\$(date -d "\$now" +%Y%m%d%H%M)" ]
do
  mkdir -p $VECTOR_SERVER_RESTORE_DIR
  chown $LAYEROPS_USER:$LAYEROPS_GROUP $VECTOR_SERVER_RESTORE_DIR
  rclone --config $RCLONE_CONFIG_FILE copy ${LOGS_S3_BUCKET}:/${VECTOR_SERVER_S3_PATH_PREFIX}/\$(date -d "\$latest" +%Y/%m/%d)/ --include "layerops_\$(date -u -d "\$latest" +%F-%H)-*" $VECTOR_SERVER_RESTORE_DIR/
  latest=\$(date -d "\$latest + 1 hour" -Iseconds)
done

EOF

  chmod 755 $VICTORIALOGS_SNAPSHOT_SCRIPT $VICTORIALOGS_RESTORE_SCRIPT
}

# Install SPIRE server and agent binaries
function _install_spire_binaries() {
  cd /tmp
  curl_with_retry -s -N -L $SPIRE_URL | tar xz || exit 1
  mv spire*/bin/spire-server $SPIRE_SERVER_PATH
  mv spire*/bin/spire-agent $SPIRE_AGENT_PATH
  chmod 755 $SPIRE_SERVER_PATH $SPIRE_AGENT_PATH
}

# Configure SPIRE server
function _configure_spire_server() {
  cat > $SPIRE_SERVER_CONFIG_FILE <<EOF
server {
    bind_address = "$WIREGUARD_PRIVATE_IP"
    bind_port = "$SPIRE_BIND_PORT"
    trust_domain = "$SPIRE_TRUST_DOMAIN"
    data_dir = "$SPIRE_SERVER_DATA_DIR"
    log_level = "INFO"
    ca_ttl = "24h"
    default_x509_svid_ttl = "1h"
    disable_jwt_svids = true
}

plugins {
    DataStore "sql" {
        plugin_data {
            database_type = "sqlite3"
            connection_string = "$SPIRE_SERVER_DATA_DIR/datastore.sqlite3"
        }
    }

    KeyManager "disk" {
        plugin_data {
            keys_path = "$SPIRE_SERVER_DATA_DIR/keys.json"
        }
    }

    NodeAttestor "http_challenge" {
        plugin_data {
            allowed_dns_patterns = ["172\\\.24\\\.[0-9]+\\\.[0-9]+"]
            required_port = $SPIRE_HTTP_CHALLENGE_PORT
            tofu = false
        }
    }
}
EOF
}

# Configure SPIRE agent for server (uses trust_bundle_path instead of URL)
function _configure_spire_agent_server() {
  cat > $SPIRE_AGENT_CONFIG_FILE <<EOF
agent {
    data_dir = "$SPIRE_AGENT_DATA_DIR"
    log_level = "DEBUG"
    trust_domain = "$SPIRE_TRUST_DOMAIN"
    server_address = "$SPIRE_SERVER_IP"
    server_port = $SPIRE_BIND_PORT
    trust_bundle_path = "$SPIRE_TRUST_BUNDLE_PATH"
    rebootstrap_mode = "auto"
}

plugins {
   KeyManager "disk" {
        plugin_data {
            directory = "$SPIRE_AGENT_DATA_DIR"
        }
    }

    NodeAttestor "http_challenge" {
        plugin_data {
            hostname = "$WIREGUARD_PRIVATE_IP"
            port = $SPIRE_HTTP_CHALLENGE_PORT
        }
    }

    WorkloadAttestor "unix" {
        plugin_data {}
    }
    WorkloadAttestor "docker" {
        plugin_data {}
    }
}
EOF
}

# Create SPIRE server systemd service
function _create_spire_server_systemd() {
  cat > $SPIRE_SERVER_SYSTEMD_FILE <<EOF
[Unit]
Description=Spire Server
Requires=network-online.target layerops-wireguard-init.service
After=network-online.target layerops-wireguard-init.service

[Service]
User=$LAYEROPS_USER
Group=$LAYEROPS_GROUP

WorkingDirectory=$LAYEROPS_HOME_DIR
TimeoutStartSec=0
ExecStart=$SPIRE_SERVER_PATH run -config $SPIRE_SERVER_CONFIG_FILE
ExecStop=/bin/kill -9 \$MAINPID

Restart=on-failure
RestartSec=5s

[Install]
WantedBy=multi-user.target
EOF

  cat > $SPIRE_SERVER_LAUNCH_SCRIPT <<EOF
#!/bin/bash
systemctl enable --now spire-server
EOF
  chmod 755 $SPIRE_SERVER_LAUNCH_SCRIPT
}

# Create SPIRE agent systemd service for server
function _create_spire_agent_systemd_server() {
  cat > $SPIRE_AGENT_SYSTEMD_FILE <<EOF
[Unit]
Description=Spire Agent
Requires=network-online.target layerops-wireguard-init.service
After=network-online.target layerops-wireguard-init.service

[Service]
User=$LAYEROPS_USER
Group=$LAYEROPS_GROUP

TimeoutStartSec=0
ExecStartPre=+setcap 'cap_net_bind_service=+ep' $SPIRE_AGENT_PATH
ExecStart=$SPIRE_AGENT_PATH run -config $SPIRE_AGENT_CONFIG_FILE
ExecStop=/bin/kill -9 \$MAINPID

Restart=on-failure
RestartSec=5s

[Install]
WantedBy=multi-user.target
EOF

  cat > $SPIRE_AGENT_LAUNCH_SCRIPT <<EOF
#!/bin/bash
systemctl enable --now spire-agent
EOF
  chmod 755 $SPIRE_AGENT_LAUNCH_SCRIPT
}

# Create server-specific config file
function _create_config_file() {
  cat > $LAYEROPS_ETC_DEFAULT_FILE <<EOF
WIREGUARD_RELOAD_PATH=$WIREGUARD_RELOAD_PATH
WIREGUARD_PRIVATE_IP=$WIREGUARD_PRIVATE_IP
WIREGUARD_PRIVATE_KEY=$WIREGUARD_PRIVATE_KEY
WIREGUARD_PUBLIC_KEY=$WIREGUARD_PUBLIC_KEY
WIREGUARD_MTU=$WIREGUARD_MTU
WIREGUARD_SUBNET=$WIREGUARD_SUBNET
SPIRE_SERVER_LAUNCH_SCRIPT=$SPIRE_SERVER_LAUNCH_SCRIPT
SPIRE_AGENT_LAUNCH_SCRIPT=$SPIRE_AGENT_LAUNCH_SCRIPT
SPIRE_TRUST_DOMAIN=$SPIRE_TRUST_DOMAIN
LAYEROPS_DATA_DIR=$LAYEROPS_DATA_DIR
STATS_PORT=$STATS_PORT
STATS_CACHE_TTL=$STATS_CACHE_TTL
API_URL=$LAYEROPS_API_URL
LAYEROPS_WORKER_URL=$LAYEROPS_WORKER_URL
LAYEROPS_WORKER_VERSION_CHECK=$LAYEROPS_WORKER_VERSION_CHECK
LAYEROPS_WORKER_SIGNATURE_PUBLIC_KEY=$LAYEROPS_WORKER_SIGNATURE_PUBLIC_KEY
ROLE=server
API_ORCHESTRATOR=$LAYEROPS_ORCHESTRATOR_URL
ENVIRONMENT_GROUP_UUID=$ENVIRONMENT_GROUP_UUID
INSTANCE_UUID=$ENVIRONMENT_GROUP_UUID
SIGNATURE=$SERVER_INSTANCE_SIGNATURE
STATS=true
EOF
}

function _init() {
  # Install required packages
  install_base_packages curl coreutils gnupg iptables jq rclone rsyslog uuid-runtime wireguard wireguard-tools zstd

  # Disable systemd-resolved
  dpkg --purge systemd-resolved

  # Set instance signature
  SERVER_INSTANCE_SIGNATURE=$(uuidgen)
  INSTANCE_INIT_REQUEST=$(cat << EOF
{
  "instanceUuid": "${ENVIRONMENT_GROUP_UUID}",
  "instanceAccessToken": "${SERVER_INSTANCE_TOKEN}",
  "signature": "${SERVER_INSTANCE_SIGNATURE}",
  "environmentGroupUuid": "${ENVIRONMENT_GROUP_UUID}"
}
EOF
)
  curl_with_retry --fail-with-body -A $CURL_USER_AGENT -X POST -H "Content-Type: application/json" -d "$INSTANCE_INIT_REQUEST" ${LAYEROPS_ORCHESTRATOR_URL}/instances/init
  [ "$?" -ne "0" ] && exit 1

  # Create layerops user (without additional SSH keys for server)
  id $LAYEROPS_USER > /dev/null 2>&1 || adduser -q --gecos "" --disabled-password --home $LAYEROPS_HOME_DIR $LAYEROPS_USER
  mkdir -p $LAYEROPS_HOME_DIR/.ssh
  touch $LAYEROPS_HOME_DIR/.ssh/authorized_keys

  # Create mandatory folders (including server-specific ones)
  mkdir -p \
    $WIREGUARD_CONFIG_DIR \
    $LAYEROPS_ETC_DIR \
    $LAYEROPS_BIN_DIR \
    $DOCKER_DATA_DIR \
    $SPIRE_CONFIG_DIR \
    $SPIRE_SERVER_DATA_DIR \
    $SPIRE_AGENT_DATA_DIR
  touch \
    $LAYEROPS_ETC_DEFAULT_FILE \
    $SPIRE_AGENT_CONFIG_FILE \
    $SPIRE_SERVER_CONFIG_FILE
  chmod 700 $WIREGUARD_CONFIG_DIR $LAYEROPS_ETC_DIR $SPIRE_CONFIG_DIR

  # Install Docker
  install_docker

  # Install Node Exporter
  install_node_exporter

  # Setup Victoria Logs S3 Backups
  _setup_victorialogs_backup

  # Set sudo access to layerops user
  cat > $SUDO_FILE <<EOF
# Group rules for layerops
%layerops ALL=(ALL) NOPASSWD: /usr/bin/wg-quick, /usr/bin/wg, $WIREGUARD_QUICK_RELOAD_PATH
%layerops ALL=(ALL) NOPASSWD: $SPIRE_SERVER_LAUNCH_SCRIPT
%layerops ALL=(ALL) NOPASSWD: $SPIRE_AGENT_LAUNCH_SCRIPT
%layerops ALL=(ALL) NOPASSWD: $VICTORIALOGS_RESTORE_SCRIPT
EOF
# %layerops ALL=(ALL) NOPASSWD: $VICTORIALOGS_RELOAD_SCRIPT
# %layerops ALL=(ALL) NOPASSWD: $VECTOR_RELOAD_SCRIPT
# %layerops ALL=(ALL) NOPASSWD: $MIMIR_RELOAD_SCRIPT
# %layerops ALL=(ALL) NOPASSWD: $PROMETHEUS_RELOAD_SCRIPT

  # Enable ip forwarding (for wireguard gateway)
  iptables -P FORWARD ACCEPT
  enable_ip_forwarding

  # Init Wireguard
  init_wireguard_base
  _launch_wireguard_ecmp_service

  # Install SPIRE server and agent
  _install_spire_binaries
  _configure_spire_server
  _configure_spire_agent_server
  _create_spire_server_systemd
  _create_spire_agent_systemd_server

  # Install layerops worker
  install_layerops_worker

  # Setup layerops config
  _create_config_file

  # Create worker systemd services
  create_worker_systemd_services

  # Set ownership for /opt/layerops directory
  chown -R $LAYEROPS_USER:$LAYEROPS_GROUP $LAYEROPS_HOME_DIR $LAYEROPS_DATA_DIR $WIREGUARD_CONFIG_DIR $LAYEROPS_ETC_DIR $SPIRE_CONFIG_DIR
  chown root:root $DOCKER_DATA_DIR

  # Enable and start layerops worker
  systemctl daemon-reload
  systemctl enable --now \
    layerops-wireguard-init \
    layerops-wireguard-ecmp \
    layerops-worker@${REMOTE_VERSION}.service \
    layerops-worker-update.timer \
    victorialogs-snapshot.timer \
    node_exporter
  systemctl restart docker
}


function _clean() {
  # Stop services
  $SYSTEMCTL_BIN_PATH disable --now \
    layerops-worker@${REMOTE_VERSION} \
    layerops-worker-update.timer \
    victorialogs-snapshot.timer \
    spire-agent \
    spire-server \
    node_exporter \
    layerops-wireguard-ecmp \
    layerops-wireguard-init
  rm -f \
    $LAYEROPS_WORKER_SYSTEMD_FILE \
    $LAYEROPS_WIREGUARD_INIT_SYSTEMD_FILE \
    $WIREGUARD_ECMP_SYSTEMD_UNIT \
    $SPIRE_AGENT_SYSTEMD_FILE \
    $SPIRE_SERVER_SYSTEMD_FILE
  systemctl daemon-reload

  # Clean base layerops installation
  clean_layerops_base
}

if [[ "$#" -eq 0 ]]; then
  usage
fi

case "$1" in
  init)
    check_envvars
    _init
    ;;
  clean)
    _clean;;
  *)
    usage;;
esac
