Spaces:

FQiao
/

SoundingStreet

Running on Zero

File size: 3,616 Bytes

3324de2

{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/OIEIEIO/TangoFlux/blob/main/Demo.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": true,
        "id": "xiaRzuzPOP4H"
      },
      "outputs": [],
      "source": [
        "!pip install git+https://github.com/declare-lab/TangoFlux.git"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": true,
        "id": "Hfu3zXTDOP4J"
      },
      "outputs": [],
      "source": [
        "import IPython\n",
        "import torchaudio\n",
        "from tangoflux import TangoFluxInference\n",
        "from IPython.display import Audio\n",
        "\n",
        "model = TangoFluxInference(name='declare-lab/TangoFlux')"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "oFiak5QIOP4K"
      },
      "outputs": [],
      "source": [
        "# @title Generate Audio\n",
        "\n",
        "prompt = 'a futuristic space craft with unique engine sound' # @param {type:\"string\"}\n",
        "duration = 10 # @param {type:\"number\"}\n",
        "steps = 50 # @param {type:\"number\"}\n",
        "\n",
        "audio = model.generate(prompt, steps=steps, duration=duration)\n",
        "\n",
        "Audio(data=audio, rate=44100)"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import IPython\n",
        "import torchaudio\n",
        "from tangoflux import TangoFluxInference\n",
        "from IPython.display import Audio\n",
        "\n",
        "model = TangoFluxInference(name='declare-lab/TangoFlux')\n",
        "\n",
        "# @title Generate Audio\n",
        "prompt = 'Melodic human whistling harmonizing with natural birdsong'  # @param {type:\"string\"}\n",
        "duration = 10  # @param {type:\"number\"}\n",
        "steps = 50  # @param {type:\"number\"}\n",
        "\n",
        "# Generate the audio\n",
        "audio = model.generate(prompt, steps=steps, duration=duration)\n",
        "\n",
        "# Ensure audio is in the correct format (2D Tensor: [channels, samples])\n",
        "if len(audio.shape) == 1:  # If mono audio (1D tensor)\n",
        "    audio_tensor = audio.unsqueeze(0)  # Add channel dimension to make it [1, samples]\n",
        "elif len(audio.shape) == 2:  # Stereo audio (2D tensor)\n",
        "    audio_tensor = audio  # Already in correct format\n",
        "else:\n",
        "    raise ValueError(f\"Unexpected audio tensor shape: {audio.shape}\")\n",
        "\n",
        "# Save the audio as a .wav file\n",
        "torchaudio.save('generated_audio.wav', audio_tensor, sample_rate=44100)\n",
        "\n",
        "# Optionally play the audio in the notebook\n",
        "Audio(data=audio.numpy(), rate=44100)\n"
      ],
      "metadata": {
        "id": "_Z8elHyOHOQ1"
      },
      "execution_count": null,
      "outputs": []
    }
  ],
  "metadata": {
    "language_info": {
      "name": "python"
    },
    "colab": {
      "provenance": [],
      "machine_shape": "hm",
      "private_outputs": true,
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}