Search code examples

How do I make/convert my python app into an Rshiny app? Its a brainteaser! Unable to find what change UI needs in R

i am new to R and trying to understand Rshiny to build UIs. I am trying to create a UI for my python app that transcribes mulitple wav files. There are two parts below, first my python app and the second my shiny app in R which uses reticulate to call my app. For some reason though, i do not receive any output.

My Python app works perfectly and does NOT need code review.However, the Rshiny app does not execute the python app correctly to produce the desired result. The objective is to let the user transcribe the files from the UI and decide if they want to download the csv.

I have a python app for transcribing files called

import os
import json
import time
# import threading
from pathlib import Path

import concurrent.futures

# from os.path import join, dirname
from ibm_watson import SpeechToTextV1
from ibm_watson.websocket import RecognizeCallback, AudioSource
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

import pandas as pd

# Replace with your api key.
my_api_key = "abc123"

# You can add a directory path to Path() if you want to run
# the project from a different folder at some point.
directory = Path().absolute()

authenticator = IAMAuthenticator(my_api_key)

service = SpeechToTextV1(authenticator=authenticator)
# I used this URL.
# service.set_service_url('') 

models = service.list_models().get_result()
#print(json.dumps(models, indent=2))

model = service.get_model('en-US_BroadbandModel').get_result()
#print(json.dumps(model, indent=2))

# get data to a csv
########################RUN THIS PART SECOND#####################################

def process_data(json_data, output_path):

    print(f"Processing: {output_path.stem}")

    cols = ["transcript", "confidence"]

    dfdata = [[t[cols[0]], t[cols[1]]] for r in json_data.get('results') for t in r.get("alternatives")]

    df0 = pd.DataFrame(data = dfdata, columns = cols)

    df1 = pd.DataFrame(json_data.get("speaker_labels")).drop(["final", "confidence"], axis=1)

    # test3 = pd.concat([df0, df1], axis=1)
    test3 = pd.merge(df0, df1, left_index = True, right_index = True)

    # sentiment
    print(f"Getting sentiment for: {output_path.stem}")
    transcript = test3["transcript"]

    analyzer = SentimentIntensityAnalyzer()
    text = transcript
    scores = [analyzer.polarity_scores(txt) for txt in text]

    # data = pd.DataFrame(text, columns = ["Text"])
    data = transcript.to_frame(name="Text")
    data2 = pd.DataFrame(scores)

    # final_dataset= pd.concat([data, data2], axis=1)
    final_dataset = pd.merge(data, data2, left_index = True, right_index = True)

    # test4 = pd.concat([test3, final_dataset], axis=1)
    test4 = pd.merge(test3, final_dataset, left_index = True, right_index = True)

    test4.drop("Text", axis=1, inplace=True)

    test4.rename(columns = {
            "neg": "Negative",
            "pos": "Positive",
            "neu": "Neutral",
            }, inplace=True)

    # This is the name of the output csv file
    test4.to_csv(output_path, index = False)

def process_audio_file(filename, output_type = "csv"):

    audio_file_path = directory.joinpath(filename)

    # Update output path to consider `output_type` parameter.
    out_path = directory.joinpath(f"{audio_file_path.stem}.{output_type}")

    print(f"Current file: '{filename}'")

    with open(audio_file_path, "rb") as audio_file:
        data = service.recognize(
                audio = audio_file,
                speaker_labels = True,
                content_type = "audio/wav",
                inactivity_timeout = -1,
                model = "en-US_NarrowbandModel",
                continuous = True,

    print(f"Speech-to-text complete for: '{filename}'")

    # Return data and output path as collection.
    return [data, out_path]

def main():
    print("Running main()...")

    # Default num. workers == min(32, os.cpu_count() + 4)
    n_workers = os.cpu_count() + 2

    # Create generator for all .wav files in folder (and subfolders).
    file_gen = directory.glob("**/*.wav")

    with concurrent.futures.ThreadPoolExecutor(max_workers = n_workers) as executor:
        futures = {executor.submit(process_audio_file, f) for f in file_gen}
        for future in concurrent.futures.as_completed(futures):
            pkg = future.result()

if __name__ == "__main__":

    print(f"Program to process audio files has started.")

    t_start = time.perf_counter()


    t_stop = time.perf_counter()
    print(f"Done! Processing completed in {t_stop - t_start} seconds.")

In Rstudio, I tried -

R.UI file

library(reticulate) # for reading Python code
library(formattable) # for adding color to tables
library(shinybusy) # for busy bar
library(DT) # for dataTableOutput


ui <- fluidPage(
  add_busy_bar(color = "#5d98ff"),
  fileInput("wavFile", "SELECT .WAV FILE", accept = ".wav"),

R.Server file

server <- function(input, output) {
  # .WAV File Selector ------------------------------------------------------
  file <- reactive({
    file <- input$wavFile # Get file from user input
    gsub("\\\\","/",file$datapath) # Access the file path. Convert back slashes to forward slashes.
  # Transcribe and Clean ----------------------------------------------------
  transcript <- reactive({
    req(input$wavFile) # Require a file before proceeding
    source_python('') # Load the Python function           # COMMENT LINE OUT WHEN TESTING NON-TRANSCRIPTION FUNCTIONALITY
    transcript <- data.frame(transcribe(file())) # Transcribe the file  # COMMENT LINE OUT WHEN TESTING NON-TRANSCRIPTION FUNCTIONALITY
    # load('transcript.rdata') # Loads a dummy transcript               # UNCOMMENT LINE OUT WHEN TESTING NON-TRANSCRIPTION FUNCTIONALITY
    transcript$transcript <- unlist(transcript$transcript) # Transcript field comes in as a list. Unlist it.
    transcript <- transcript[which(!($confidence))),] # Remove empty lines
    names(transcript) <- str_to_title(names(transcript)) # Capitalize column headers
    transcript # Return the transcript
  # Use a server-side download button ---------------------------------------
  # that the download button only appears after transcription
  output$downloadData <- renderUI({
    downloadButton("handleDownload","Download CSV")
  output$handleDownload <- downloadHandler(
    filename = function() {
      paste('Transcript ',Sys.Date(), ".csv", sep = "")
    content = function(file) {
      write.csv(transcript(), file, row.names = FALSE)
  # Transcript table --------------------------------------------------------
  output$transcript <- renderDataTable({ 
      transcript() %>%
      list(Confidence = color_tile('#ffffff','#a2b3c8'),
           Negative = color_tile('#ffffff', '#e74446'),
           Positive = color_tile('#ffffff', "#499650")
    ), rownames = FALSE, options =list(paging = FALSE)
  # END ---------------------------------------------------------------------


  • In shiny, you need to pass argument properly in python script. An easy way around is to define a function in a python script and call that function in shiny.

    Here is your modified python script (edited process_data function and added run_script function) -

    import os
    import json
    import time
    # import threading
    from pathlib import Path
    import concurrent.futures
    # from os.path import join, dirname
    from ibm_watson import SpeechToTextV1
    from ibm_watson.websocket import RecognizeCallback, AudioSource
    from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
    from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
    import pandas as pd
    # Replace with your api key.
    my_api_key = "api_key"
    # You can add a directory path to Path() if you want to run
    # the project from a different folder at some point.
    directory = Path().absolute()
    authenticator = IAMAuthenticator(my_api_key)
    service = SpeechToTextV1(authenticator=authenticator)
    # I used this URL.
    # service.set_service_url('') 
    models = service.list_models().get_result()
    #print(json.dumps(models, indent=2))
    model = service.get_model('en-US_BroadbandModel').get_result()
    #print(json.dumps(model, indent=2))
    # get data to a csv
    ########################RUN THIS PART SECOND#####################################
    def process_data(json_data):
        #print(f"Processing: {output_path.stem}")
        cols = ["transcript", "confidence"]
        dfdata = [[t[cols[0]], t[cols[1]]] for r in json_data.get('results') for t in r.get("alternatives")]
        df0 = pd.DataFrame(data = dfdata, columns = cols)
        df1 = pd.DataFrame(json_data.get("speaker_labels")).drop(["final", "confidence"], axis=1)
        # test3 = pd.concat([df0, df1], axis=1)
        test3 = pd.merge(df0, df1, left_index = True, right_index = True)
        # sentiment
        #print(f"Getting sentiment for: {output_path.stem}")
        transcript = test3["transcript"]
        analyzer = SentimentIntensityAnalyzer()
        text = transcript
        scores = [analyzer.polarity_scores(txt) for txt in text]
        # data = pd.DataFrame(text, columns = ["Text"])
        data = transcript.to_frame(name="Text")
        data2 = pd.DataFrame(scores)
        # final_dataset= pd.concat([data, data2], axis=1)
        final_dataset = pd.merge(data, data2, left_index = True, right_index = True)
        # test4 = pd.concat([test3, final_dataset], axis=1)
        test4 = pd.merge(test3, final_dataset, left_index = True, right_index = True)
        test4.drop("Text", axis=1, inplace=True)
        test4.rename(columns = {
                "neg": "Negative",
                "pos": "Positive",
                "neu": "Neutral",
                }, inplace=True)
        # This is the name of the output csv file
        # test4.to_csv(output_path, index = False)
    def process_audio_file(filename, output_type = "csv"):
        audio_file_path = directory.joinpath(filename)
        # Update output path to consider `output_type` parameter.
        out_path = directory.joinpath(f"{audio_file_path.stem}.{output_type}")
        print(f"Current file: '{filename}'")
        with open(audio_file_path, "rb") as audio_file:
            data = service.recognize(
                    audio = audio_file,
                    speaker_labels = True,
                    content_type = "audio/wav",
                    inactivity_timeout = -1,
                    model = "en-US_NarrowbandModel",
                    continuous = True,
        print(f"Speech-to-text complete for: '{filename}'")
        # Return data and output path as collection.
        return [data, out_path]
    def main():
        print("Running main()...")
        # Default num. workers == min(32, os.cpu_count() + 4)
        n_workers = os.cpu_count() + 2
        # Create generator for all .wav files in folder (and subfolders).
        file_gen = directory.glob("**/*.wav")
        with concurrent.futures.ThreadPoolExecutor(max_workers = n_workers) as executor:
            futures = {executor.submit(process_audio_file, f) for f in file_gen}
            for future in concurrent.futures.as_completed(futures):
                pkg = future.result()
    def run_script (filename):

    Shiny code

    In server file call run_script function rather than transcribe. Make sure that file is in working directory. Corrected some typo in output$transcript

    library(reticulate) # for reading Python code
    library(formattable) # for adding color to tables
    library(shinybusy) # for busy bar
    library(DT) # for dataTableOutput
    ui <- fluidPage(
      add_busy_bar(color = "#5d98ff"),
      fileInput("wavFile", "SELECT .WAV FILE", accept = ".wav",multiple = T),
    server <- function(input, output) {
      # .WAV File Selector ------------------------------------------------------
      file <- reactive({
        req(input$wavFile) # Require a file before proceeding
        files <- input$wavFile # Get file from user input
        file = NULL
        for (i in 1:nrow(files)){
          file = c(file,gsub("\\\\","/",files$datapath[i])) # Access the file path. Convert back slashes to forward slashes.  
      # Transcribe and Clean ----------------------------------------------------
      transcript <- reactive({
        dft= data.frame(NULL)
        for(j in 1:length(file())){
        t0 = Sys.time()
        transcript <- run_script(file()[j])   #  Transcribe the file  # COMMENT LINE OUT WHEN TESTING NON-TRANSCRIPTION FUNCTIONALITY
        t1 = Sys.time() - t0
        transcript$File = j; transcript$Time = t1
        dft = rbind(dft,transcript)
        return(dft) # Return the transcript
      # Use a server-side download button ---------------------------------------
      # that the download button only appears after transcription
      output$downloadData <- renderUI({
        downloadButton("handleDownload","Download CSV")
      output$handleDownload <- downloadHandler(
        filename = function() {
          paste('Transcript ',Sys.Date(), ".csv", sep = "")
        content = function(file) {
          write.csv(transcript(), file, row.names = FALSE)
      # Transcript table --------------------------------------------------------
      output$transcript <- renderDataTable({ 
          transcript() %>%
          list(Confidence = color_tile('#ffffff','#a2b3c8'),
               Negative = color_tile('#ffffff', '#e74446'),
               Positive = color_tile('#ffffff', "#499650")
        ), rownames = FALSE, options =list(paging = FALSE)
        # END ---------------------------------------------------------------------
    # Return a Shiny app object
    shinyApp(ui = ui, server = server)

    Note that shiny download works only in web-browsers so you must open app in a web-browser enter image description here