Skip to content

Commit 7eead14

Browse files
mthrokfacebook-github-bot
authored andcommitted
Import torchaudio #1066 4406a6b
Summary: Import up to #1066 Reviewed By: cpuhrsch Differential Revision: D25373068 fbshipit-source-id: 890d36a25259b93428b3037c3123ff5a2cacfa04
1 parent 9caa932 commit 7eead14

34 files changed

+569
-287
lines changed

.circleci/build_docs/commit_docs.sh

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,19 @@ fi
1313
src=$1
1414
target=$2
1515

16-
set -ex
1716
echo "committing docs from ${src} to ${target}"
1817

1918
git checkout gh-pages
20-
rm -rf docs/"${target}"/*
21-
cp -r "${src}"/build/html/* docs/"${target}"
19+
mkdir -p ./"${target}"
20+
rm -rf ./"${target}"/*
21+
cp -r "${src}/build/html/"* ./"$target"
2222
if [ "${target}" == "master" ]; then
23-
rm -rf docs/_static/*
24-
cp -r "${src}"/build/html/_static/* docs/_static
23+
mkdir -p ./_static
24+
rm -rf ./_static/*
25+
cp -r "${src}/build/html/_static/"* ./_static
26+
git add --all ./_static || true
2527
fi
26-
git add docs || true
28+
git add --all ./"${target}" || true
2729
git config user.email "[email protected]"
2830
git config user.name "pytorchbot"
2931
# If there aren't changes, don't make a commit; push is no-op

.circleci/config.yml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -684,6 +684,16 @@ jobs:
684684
- attach_workspace:
685685
at: ~/workspace
686686
- designate_upload_channel
687+
- run:
688+
name: Generate netrc
689+
command: |
690+
# set credentials for https pushing
691+
# requires the org-member context
692+
cat > ~/.netrc \<<DONE
693+
machine github.com
694+
login pytorchbot
695+
password ${GITHUB_PYTORCHBOT_TOKEN}
696+
DONE
687697
- checkout
688698
- run:
689699
name: Upload docs
@@ -785,6 +795,7 @@ workflows:
785795
requires:
786796
- binary_linux_wheel_py3.8
787797
- upload_docs:
798+
context: org-member
788799
filters:
789800
branches:
790801
only:

.circleci/config.yml.in

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -684,6 +684,16 @@ jobs:
684684
- attach_workspace:
685685
at: ~/workspace
686686
- designate_upload_channel
687+
- run:
688+
name: Generate netrc
689+
command: |
690+
# set credentials for https pushing
691+
# requires the org-member context
692+
cat > ~/.netrc \<<DONE
693+
machine github.com
694+
login pytorchbot
695+
password ${GITHUB_PYTORCHBOT_TOKEN}
696+
DONE
687697
- checkout
688698
- run:
689699
name: Upload docs

.circleci/regenerate.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ def build_doc_job(filter_branch):
7979
job = {
8080
"name": "build_docs",
8181
"python_version": "3.8",
82-
"requires": ["binary_linux_wheel_py3.8"],
82+
"requires": ["binary_linux_wheel_py3.8", ],
8383
}
8484

8585
if filter_branch:
@@ -90,8 +90,9 @@ def build_doc_job(filter_branch):
9090
def upload_doc_job(filter_branch):
9191
job = {
9292
"name": "upload_docs",
93+
"context": "org-member",
9394
"python_version": "3.8",
94-
"requires": ["build_docs"],
95+
"requires": ["build_docs", ],
9596
}
9697

9798
if filter_branch:

README.md

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -10,29 +10,29 @@ the audio domain. By supporting PyTorch, torchaudio follows the same philosophy
1010
of providing strong GPU acceleration, having a focus on trainable features through
1111
the autograd system, and having consistent style (tensor names and dimension names).
1212
Therefore, it is primarily a machine learning library and not a general signal
13-
processing library. The benefits of PyTorch is be seen in torchaudio through
13+
processing library. The benefits of PyTorch can be seen in torchaudio through
1414
having all the computations be through PyTorch operations which makes it easy
1515
to use and feel like a natural extension.
1616

17-
- [Support audio I/O (Load files, Save files)](http://pytorch.org/audio/)
17+
- [Support audio I/O (Load files, Save files)](http://pytorch.org/audio/stable/)
1818
- Load the following formats into a torch Tensor using SoX
1919
- mp3, wav, aac, ogg, flac, avr, cdda, cvs/vms,
2020
- aiff, au, amr, mp2, mp4, ac3, avi, wmv,
2121
- mpeg, ircam and any other format supported by libsox.
22-
- [Kaldi (ark/scp)](http://pytorch.org/audio/kaldi_io.html)
23-
- [Dataloaders for common audio datasets (VCTK, YesNo)](http://pytorch.org/audio/datasets.html)
22+
- [Kaldi (ark/scp)](http://pytorch.org/audio/stable/kaldi_io.html)
23+
- [Dataloaders for common audio datasets (VCTK, YesNo)](http://pytorch.org/audio/stable/datasets.html)
2424
- Common audio transforms
25-
- [Spectrogram, AmplitudeToDB, MelScale, MelSpectrogram, MFCC, MuLawEncoding, MuLawDecoding, Resample](http://pytorch.org/audio/transforms.html)
25+
- [Spectrogram, AmplitudeToDB, MelScale, MelSpectrogram, MFCC, MuLawEncoding, MuLawDecoding, Resample](http://pytorch.org/audio/stable/transforms.html)
2626
- Compliance interfaces: Run code using PyTorch that align with other libraries
27-
- [Kaldi: spectrogram, fbank, mfcc, resample_waveform](https://pytorch.org/audio/compliance.kaldi.html)
27+
- [Kaldi: spectrogram, fbank, mfcc, resample_waveform](https://pytorch.org/audio/stable/compliance.kaldi.html)
2828

2929
Dependencies
3030
------------
3131
* PyTorch (See below for the compatible versions)
3232
* libsox v14.3.2 or above (only required when building from source)
3333
* [optional] vesis84/kaldi-io-for-python commit cb46cb1f44318a5d04d4941cf39084c5b021241e or above
3434

35-
The following is the corresponding ``torchaudio`` versions and supported Python versions.
35+
The following are the corresponding ``torchaudio`` versions and supported Python versions.
3636

3737
| ``torch`` | ``torchaudio`` | ``python`` |
3838
| ------------------------ | ------------------------ | ------------------------------- |
@@ -46,7 +46,7 @@ The following is the corresponding ``torchaudio`` versions and supported Python
4646
Installation
4747
------------
4848

49-
### Binary Distibutions
49+
### Binary Distributions
5050

5151
To install the latest version using anaconda, run:
5252

@@ -127,7 +127,7 @@ BUILD_SOX=1 MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++ python setup.py i
127127
```
128128

129129
This is known to work on linux and unix distributions such as Ubuntu and CentOS 7 and macOS.
130-
If you try this on a new system and found a solution to make it work, feel free to share it by opening and issue.
130+
If you try this on a new system and find a solution to make it work, feel free to share it by opening an issue.
131131

132132
#### Troubleshooting
133133

@@ -195,16 +195,16 @@ Conventions
195195

196196
With torchaudio being a machine learning library and built on top of PyTorch,
197197
torchaudio is standardized around the following naming conventions. Tensors are
198-
assumed to have channel as the first dimension and time as the last
198+
assumed to have channels as the first dimension and time as the last
199199
dimension (when applicable). This makes it consistent with PyTorch's dimensions.
200200
For size names, the prefix `n_` is used (e.g. "a tensor of size (`n_freq`, `n_mel`)")
201201
whereas dimension names do not have this prefix (e.g. "a tensor of
202-
dimension (channel, time)")
202+
dimension (channels, time)")
203203

204-
* `waveform`: a tensor of audio samples with dimensions (channel, time)
204+
* `waveform`: a tensor of audio samples with dimensions (channels, time)
205205
* `sample_rate`: the rate of audio dimensions (samples per second)
206-
* `specgram`: a tensor of spectrogram with dimensions (channel, freq, time)
207-
* `mel_specgram`: a mel spectrogram with dimensions (channel, mel, time)
206+
* `specgram`: a tensor of spectrogram with dimensions (channels, freq, time)
207+
* `mel_specgram`: a mel spectrogram with dimensions (channels, mel, time)
208208
* `hop_length`: the number of samples between the starts of consecutive frames
209209
* `n_fft`: the number of Fourier bins
210210
* `n_mel`, `n_mfcc`: the number of mel and MFCC bins
@@ -216,16 +216,16 @@ dimension (channel, time)")
216216

217217
Transforms expect and return the following dimensions.
218218

219-
* `Spectrogram`: (channel, time) -> (channel, freq, time)
220-
* `AmplitudeToDB`: (channel, freq, time) -> (channel, freq, time)
221-
* `MelScale`: (channel, freq, time) -> (channel, mel, time)
222-
* `MelSpectrogram`: (channel, time) -> (channel, mel, time)
223-
* `MFCC`: (channel, time) -> (channel, mfcc, time)
224-
* `MuLawEncode`: (channel, time) -> (channel, time)
225-
* `MuLawDecode`: (channel, time) -> (channel, time)
226-
* `Resample`: (channel, time) -> (channel, time)
227-
* `Fade`: (channel, time) -> (channel, time)
228-
* `Vol`: (channel, time) -> (channel, time)
219+
* `Spectrogram`: (channels, time) -> (channels, freq, time)
220+
* `AmplitudeToDB`: (channels, freq, time) -> (channels, freq, time)
221+
* `MelScale`: (channels, freq, time) -> (channels, mel, time)
222+
* `MelSpectrogram`: (channels, time) -> (channels, mel, time)
223+
* `MFCC`: (channels, time) -> (channel, mfcc, time)
224+
* `MuLawEncode`: (channels, time) -> (channels, time)
225+
* `MuLawDecode`: (channels, time) -> (channels, time)
226+
* `Resample`: (channels, time) -> (channels, time)
227+
* `Fade`: (channels, time) -> (channels, time)
228+
* `Vol`: (channels, time) -> (channels, time)
229229

230230
Complex numbers are supported via tensors of dimension (..., 2), and torchaudio provides `complex_norm` and `angle` to convert such a tensor into its magnitude and phase. Here, and in the documentation, we use an ellipsis "..." as a placeholder for the rest of the dimensions of a tensor, e.g. optional batching and channel dimensions.
231231

build_tools/setup_helpers/extension.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,8 @@ def _get_extra_objects():
8989
'libvorbisfile.a',
9090
'libvorbis.a',
9191
'libogg.a',
92+
'libopencore-amrnb.a',
93+
'libopencore-amrwb.a',
9294
]
9395
for lib in libs:
9496
objs.append(str(_TP_INSTALL_DIR / 'lib' / lib))

docs/source/datasets.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ torchaudio.datasets
22
====================
33

44
All datasets are subclasses of :class:`torch.utils.data.Dataset`
5-
i.e, they have ``__getitem__`` and ``__len__`` methods implemented.
5+
and have ``__getitem__`` and ``__len__`` methods implemented.
66
Hence, they can all be passed to a :class:`torch.utils.data.DataLoader`
77
which can load multiple samples parallelly using ``torch.multiprocessing`` workers.
88
For example: ::

docs/source/functional.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,11 +124,21 @@ vad
124124

125125
.. autofunction:: spectrogram
126126

127+
:hidden:`griffinlim`
128+
~~~~~~~~~~~~~~~~~~~~~~~~~
129+
130+
.. autofunction:: griffinlim
131+
127132
:hidden:`amplitude_to_DB`
128133
~~~~~~~~~~~~~~~~~~~~~~~~~
129134

130135
.. autofunction:: amplitude_to_DB
131136

137+
:hidden:`DB_to_amplitude`
138+
~~~~~~~~~~~~~~~~~~~~~~~~~
139+
140+
.. autofunction:: DB_to_amplitude
141+
132142
:hidden:`create_fb_matrix`
133143
~~~~~~~~~~~~~~~~~~~~~~~~~~
134144

test/torchaudio_unittest/datasets/librispeech_test.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import os
2+
from pathlib import Path
23

34
from torchaudio.datasets import librispeech
45

@@ -91,11 +92,7 @@ def tearDownClass(cls):
9192
# In case of test failure
9293
librispeech.LIBRISPEECH._ext_audio = '.flac'
9394

94-
def test_librispeech(self):
95-
librispeech.LIBRISPEECH._ext_audio = '.wav'
96-
dataset = librispeech.LIBRISPEECH(self.root_dir)
97-
print(dataset._path)
98-
95+
def _test_librispeech(self, dataset):
9996
num_samples = 0
10097
for i, (
10198
data, sample_rate, utterance, speaker_id, chapter_id, utterance_id
@@ -110,3 +107,13 @@ def test_librispeech(self):
110107

111108
assert num_samples == len(self.samples)
112109
librispeech.LIBRISPEECH._ext_audio = '.flac'
110+
111+
def test_librispeech_str(self):
112+
librispeech.LIBRISPEECH._ext_audio = '.wav'
113+
dataset = librispeech.LIBRISPEECH(self.root_dir)
114+
self._test_librispeech(dataset)
115+
116+
def test_librispeech_path(self):
117+
librispeech.LIBRISPEECH._ext_audio = '.wav'
118+
dataset = librispeech.LIBRISPEECH(Path(self.root_dir))
119+
self._test_librispeech(dataset)

test/torchaudio_unittest/datasets/libritts_test.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import os
2+
from pathlib import Path
23

34
from torchaudio.datasets.libritts import LIBRITTS
45

@@ -47,8 +48,7 @@ def setUpClass(cls):
4748
with open(path_normalized, 'w') as file_:
4849
file_.write(cls.normalized_text)
4950

50-
def test_libritts(self):
51-
dataset = LIBRITTS(self.root_dir)
51+
def _test_libritts(self, dataset):
5252
n_ites = 0
5353
for i, (waveform,
5454
sample_rate,
@@ -69,3 +69,11 @@ def test_libritts(self):
6969
assert utterance_id == f'{"_".join(str(u) for u in expected_ids[-4:])}'
7070
n_ites += 1
7171
assert n_ites == len(self.utterance_ids)
72+
73+
def test_libritts_str(self):
74+
dataset = LIBRITTS(self.root_dir)
75+
self._test_libritts(dataset)
76+
77+
def test_libritts_path(self):
78+
dataset = LIBRITTS(Path(self.root_dir))
79+
self._test_libritts(dataset)

0 commit comments

Comments
 (0)