#!/usr/bin/zsh
#If you replace the shell above with bash, note also lines 121 to 124
# Copyright (c) 2024 Quantius Benignus
# 
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#--------------------------------------------------------------------------

# NAME: wsi 
# PREREQUSITES: 
#  - whisper.cpp installation (see https://github.com/ggerganov/whisper.cpp) ,
#  - OR a whisperfile executable (no installation needed, see https://huggingface.co/Mozilla/whisperfile)
#  - recent versions of 'sox', 'curl', 'jq', 'xsel' or 'wl-copy' CLI tools from your system's repositories.
#--------------------------------------------------------------------------

#X11 or Wayland (2nd line may catch edge cases):
wm="$XDG_SESSION_TYPE"
#wm="${XDG_SESSION_TYPE:-$(loginctl show-session $(loginctl | grep $(whoami) | awk '{print $1}') -p Type --value)}"

#---USER CONFIGURATION BLOCK----------------------------------------------------------------------
#Please, adjust the variables here to suit your environment:
# Store temp files in memory for speed and to reduce SSD/HDD "grinding":
TEMPD='/dev/shm'
# Default whisper.cpp model file for inference:
#model="$TEMPD/ggml-base.en.bin"
model="$HOME/CHANGE_PATH_TO/WHISPER_CPP/MODELS/HERE/ggml-base.en.bin"
model=/usr/share/whisper/models/ggml-base.en.bin

# Uncomment to use available (in the PATH) whisperfile (WITH BUILT-IN MODEL) instead of standalone whisper.cpp or whisper.cpp server
#This mode has precedence over a whisper.cpp installation, so uncommenting it will make blurt default to the whisperfile.
#The advantage of the whisperfile is that no installation is needed, the model is built-in, just `chmod +x whisperfile...` and it is ready to use. 
#WHISPERFILE="whisper-tiny.en.llamafile" #this and others available from https://huggingface.co/Mozilla/whisperfile/tree/main

# Hardcoded temp wav file to store the voice memo and get overwritten every time (in RAM):
ramf="$TEMPD/wfile-$USER"
#Set the number of processing threads for whisper.cpp inference (adjust for your case):
#NTHR=8
#It seems that the optimum number of transcribe threads should equal half CPU processing cores:
NTHR=$(( $(getconf _NPROCESSORS_ONLN) / 2 ))
# Use PRIMARY selection (middle mouse button to paste) unless CLIPBOARD selected from GUI: 
CLIPBOARD=false
#Provide hardcoded whisper.cpp server hostname and port. To be used when invoked with -n option from CLI (or from Blurt with any string starting - '-' in the hostname field):
WHOST="127.0.0.1"
WPORT="58080"             
#Can be overwritten from Blurt when command line argument IP:PORT is supplied instead of '-n'
#---END USER CONFIG BLOCK------------------------------------------------------------------------

if ! : ; then
#---CHECK DEPENDENCIES. This block can be commented out once dependencies confirmed-----------------------------------
command -v curl &>/dev/null || { echo "curl is required. Please, install curl" >&2 ; exit 1 ; }
#The next is only needed if curl requests json output from the whisper.cpp server;
#command -v jq &>/dev/null || { echo "jq is required. Please, install jq" >&2 ; exit 1 ; }
command -v sox &>/dev/null || { echo "sox is required. Please, install sox" >&2 ; exit 1 ; }
command -v $WHISPERFILE &>/dev/null || { echo "No whisperfile found, get one from https://huggingface.co/Mozilla/whisperfile. Checking for whisper.cpp...   " ; \
command -v transcribe &>/dev/null || { echo -e "Please, install whisper.cpp (see https://github.com/ggerganov/whisper.cpp)\
\nand create 'transcribe' in your PATH as a symbolic link to the main executable, e.g.\n \
 'ln -s /full/path/to/whisper.cpp/main \$HOME/.local/bin/transcribe'" >&2 ; exit 1 ; } }
#Now let's check if we are in X11 or Wayland and use the right utility:
if [[ wm == "wayland" ]]; then
    command -v wl-copy &>/dev/null || { echo "wl-copy is needed for the clipboard. Please, install wl-copy" >&2 ; exit 1 ; } 
elif [[ wm == "X11" ]]; then
    command -v xsel &>/dev/null || { echo "We rely on xsel for the clipboard. Please, install xsel." >&2 ; exit 1 ; }
fi
fi
#This is needed if wsi is used with Blurt (GNOME shell extension):
gsettings get org.gnome.shell.extensions.blurt use-api &> /dev/null || { echo "This \
 script is made to work with Blurt (GNOME shell extension). Please, install it from https://extensions.gnome.org/extension/6742/blurt/"; exit 1 ; }
#---END CHECK DEPENDENCIES. The above block can be commented out after successful 1st run----------------------------

#Hear the complaints of the above tools and do not continue with the sequence:
set -e

# Process command line arguments
while [ $# -gt 0 ]; do
    case "$1" in
        -c|--clipboard)
            CLIPBOARD=true
            shift
            ;;
        -n|--netapi)
            #This uses the hostname or IP and port specified in the config block (when run from CLI or set in Blurt with '-something')
            #Can be overwritten from Blurt, supplied as command line flag IP:PORT instead of this option
            IPnPORT="$WHOST:$WPORT" 
            if [[ "$(curl -s -f -o /dev/null -w '%{http_code}' $IPnPORT)" != "200" ]]; then
                echo "Can't connect to whisper.cpp server at provided address!" >&2   
                exit 1
            fi
            shift
            ;;
        -h|--help)
            echo "Usage: $0 [-c|--clipboard] [-n|--netapi]"
            echo "  -c, --clipboard: Use clipboard instead of PRIMARY selection"
            echo "  -n, --netapi: Use whisper.cpp server with the host:port in the GONFIG block"
            exit 0
            ;;
        *)
            #The network address and port should have already been sanitized in extension
            IPnPORT=$1
            if [[ "$(curl -s -f -o /dev/null -w '%{http_code}' $IPnPORT)" != "200" ]]; then
                echo "Can't connect to a whisper.cpp server at provided address!" >&2   
                exit 1
            fi
            shift
            ;;
    esac
done

#trap "" SIGINT
rec -q -t wav $ramf rate 16k silence 1 0.1 1% 1 1.0 1%  2>/dev/null
 
if [ -n "$IPnPORT" ]; then
    str=$(curl -S -s $IPnPORT/inference \
        -H "Content-Type: multipart/form-data" \
        -F file="@$ramf" \
        -F temperature="0.0" \
        -F temperature_inc="0.2" \
        -F response_format="text")
# | jq -r '.text' )
elif command -v "$WHISPERFILE" &>/dev/null ; then 
    str="$($WHISPERFILE -t $NTHR -nt -f $ramf 2>/dev/null)"
    #One can still use the standalone model with the whisperfile, if so desired:
    #str="$($WHISPERFILE -t $NTHR -nt -m $model -f $ramf 2>/dev/null)"
else
    str="$(whisper-cli -t $NTHR -nt -m $model -f $ramf 2>/dev/null)" 
fi
# Whisper detected non-speech events such as (wind blowing): 
str="${str/\(*\)}"   
str="${str/\[*\]}"
str="${str#$'\n'}"    
str="${str#$'\n'}"
#Prefer the power of zsh, but loose full POSIX compliance.
if [ -n "$ZSH_NAME" ]; then
    str="${str#*([[:space:]])}"
    str="${(u)str}"
elif [ -n "$BASH" ]; then
    #Running in bash because you changed the shebang on line 1
    str="${str##+([[:space:]])}"
    str="${str^}"
else
    echo "Unknown shell, assuming bash compliance"
    str="${str##+([[:space:]])}"
    str="${str^}"
fi

#We have a result, now we make a few decisions:
#If this is somehow run in a text console: 
if [[ -z "${DISPLAY}" ]] || [[ -z "${DESKTOP_SESSION}" ]] || [[ -z "${XDG_CURRENT_DESKTOP}" ]]; then
#"Not running in a known graphics environment. Using standard output:
    echo $str ; exit 0
else
#xsel or wl-copy:
 case "$wm" in
    "x11")
        if [ "$CLIPBOARD" = true ]; then
          echo $str | xsel -ib
        else
          echo $str | xsel -ip
        fi
        ;;
    "wayland")
        if [ "$CLIPBOARD" = true ]; then
          echo $str | wl-copy &
        else
          echo $str | wl-copy -p &
        fi 
        ;;
    *)
        echo $str
        ;;
 esac
fi
