Spaces:
Running
on
Zero
Running
on
Zero
File size: 3,616 Bytes
3324de2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/github/OIEIEIO/TangoFlux/blob/main/Demo.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"id": "xiaRzuzPOP4H"
},
"outputs": [],
"source": [
"!pip install git+https://github.com/declare-lab/TangoFlux.git"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"id": "Hfu3zXTDOP4J"
},
"outputs": [],
"source": [
"import IPython\n",
"import torchaudio\n",
"from tangoflux import TangoFluxInference\n",
"from IPython.display import Audio\n",
"\n",
"model = TangoFluxInference(name='declare-lab/TangoFlux')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "oFiak5QIOP4K"
},
"outputs": [],
"source": [
"# @title Generate Audio\n",
"\n",
"prompt = 'a futuristic space craft with unique engine sound' # @param {type:\"string\"}\n",
"duration = 10 # @param {type:\"number\"}\n",
"steps = 50 # @param {type:\"number\"}\n",
"\n",
"audio = model.generate(prompt, steps=steps, duration=duration)\n",
"\n",
"Audio(data=audio, rate=44100)"
]
},
{
"cell_type": "code",
"source": [
"import IPython\n",
"import torchaudio\n",
"from tangoflux import TangoFluxInference\n",
"from IPython.display import Audio\n",
"\n",
"model = TangoFluxInference(name='declare-lab/TangoFlux')\n",
"\n",
"# @title Generate Audio\n",
"prompt = 'Melodic human whistling harmonizing with natural birdsong' # @param {type:\"string\"}\n",
"duration = 10 # @param {type:\"number\"}\n",
"steps = 50 # @param {type:\"number\"}\n",
"\n",
"# Generate the audio\n",
"audio = model.generate(prompt, steps=steps, duration=duration)\n",
"\n",
"# Ensure audio is in the correct format (2D Tensor: [channels, samples])\n",
"if len(audio.shape) == 1: # If mono audio (1D tensor)\n",
" audio_tensor = audio.unsqueeze(0) # Add channel dimension to make it [1, samples]\n",
"elif len(audio.shape) == 2: # Stereo audio (2D tensor)\n",
" audio_tensor = audio # Already in correct format\n",
"else:\n",
" raise ValueError(f\"Unexpected audio tensor shape: {audio.shape}\")\n",
"\n",
"# Save the audio as a .wav file\n",
"torchaudio.save('generated_audio.wav', audio_tensor, sample_rate=44100)\n",
"\n",
"# Optionally play the audio in the notebook\n",
"Audio(data=audio.numpy(), rate=44100)\n"
],
"metadata": {
"id": "_Z8elHyOHOQ1"
},
"execution_count": null,
"outputs": []
}
],
"metadata": {
"language_info": {
"name": "python"
},
"colab": {
"provenance": [],
"machine_shape": "hm",
"private_outputs": true,
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
}
},
"nbformat": 4,
"nbformat_minor": 0
} |