-
Notifications
You must be signed in to change notification settings - Fork 6.8k
Update lip reading example #13647
Update lip reading example #13647
Changes from 32 commits
4ce3c9d
a2d237c
c6007ea
6cd8667
a0071d5
5f78f05
089455d
ab79109
9f10967
4aa4640
c5503d9
efe6295
a958ad9
3e8a709
4e7ba27
b8fbb26
ac509a5
ddeb117
271f3ac
2ba0b90
71d779d
a9da0e0
c003210
e2f1b42
81b0185
54afdc5
39d3378
fcf5251
22afc90
8e0d34b
7a1bffc
9bf3483
37a0759
49c0861
f2b60f5
b3804e6
7d6900d
0ad9d29
bf550fd
8a42b00
f487255
a18a96b
66c1b94
05009c8
b2f8d51
97dbcde
ed3e4c1
de1eb6b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
__pycache__/ | ||
utils/*.dat | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,168 @@ | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
|
||
""" | ||
Module : this module to decode using beam search | ||
/~https://github.com/ThomasDelteil/HandwrittenTextRecognition_MXNet/blob/master/utils/CTCDecoder/BeamSearch.py | ||
""" | ||
|
||
from __future__ import division | ||
from __future__ import print_function | ||
import numpy as np | ||
|
||
class BeamEntry: | ||
""" | ||
information about one single beam at specific time-step | ||
""" | ||
def __init__(self): | ||
self.prTotal = 0 # blank and non-blank | ||
self.prNonBlank = 0 # non-blank | ||
self.prBlank = 0 # blank | ||
self.prText = 1 # LM score | ||
self.lmApplied = False # flag if LM was already applied to this beam | ||
self.labeling = () # beam-labeling | ||
|
||
class BeamState: | ||
""" | ||
information about the beams at specific time-step | ||
""" | ||
def __init__(self): | ||
self.entries = {} | ||
|
||
def norm(self): | ||
""" | ||
length-normalise LM score | ||
""" | ||
for (k, _) in self.entries.items(): | ||
labelingLen = len(self.entries[k].labeling) | ||
self.entries[k].prText = self.entries[k].prText ** (1.0 / (labelingLen if labelingLen else 1.0)) | ||
|
||
def sort(self): | ||
""" | ||
return beam-labelings, sorted by probability | ||
""" | ||
beams = [v for (_, v) in self.entries.items()] | ||
sortedBeams = sorted(beams, reverse=True, key=lambda x: x.prTotal*x.prText) | ||
return [x.labeling for x in sortedBeams] | ||
|
||
def applyLM(parentBeam, childBeam, classes, lm): | ||
""" | ||
calculate LM score of child beam by taking score from parent beam and bigram probability of last two chars | ||
""" | ||
if lm and not childBeam.lmApplied: | ||
c1 = classes[parentBeam.labeling[-1] if parentBeam.labeling else classes.index(' ')] # first char | ||
c2 = classes[childBeam.labeling[-1]] # second char | ||
lmFactor = 0.01 # influence of language model | ||
bigramProb = lm.getCharBigram(c1, c2) ** lmFactor # probability of seeing first and second char next to each other | ||
childBeam.prText = parentBeam.prText * bigramProb # probability of char sequence | ||
childBeam.lmApplied = True # only apply LM once per beam entry | ||
|
||
def addBeam(beamState, labeling): | ||
""" | ||
add beam if it does not yet exist | ||
""" | ||
if labeling not in beamState.entries: | ||
beamState.entries[labeling] = BeamEntry() | ||
|
||
def ctcBeamSearch(mat, classes, lm, k, beamWidth): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Any chance you could add a quick unit test for this function? It looks complex, and could very easily contain a bug. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added |
||
""" | ||
beam search as described by the paper of Hwang et al. and the paper of Graves et al. | ||
""" | ||
|
||
blankIdx = len(classes) | ||
maxT, maxC = mat.shape | ||
|
||
# initialise beam state | ||
last = BeamState() | ||
labeling = () | ||
last.entries[labeling] = BeamEntry() | ||
last.entries[labeling].prBlank = 1 | ||
last.entries[labeling].prTotal = 1 | ||
|
||
# go over all time-steps | ||
for t in range(maxT): | ||
curr = BeamState() | ||
|
||
# get beam-labelings of best beams | ||
bestLabelings = last.sort()[0:beamWidth] | ||
|
||
# go over best beams | ||
for labeling in bestLabelings: | ||
|
||
# probability of paths ending with a non-blank | ||
prNonBlank = 0 | ||
# in case of non-empty beam | ||
if labeling: | ||
# probability of paths with repeated last char at the end | ||
try: | ||
prNonBlank = last.entries[labeling].prNonBlank * mat[t, labeling[-1]] | ||
except FloatingPointError: | ||
prNonBlank = 0 | ||
|
||
# probability of paths ending with a blank | ||
prBlank = (last.entries[labeling].prTotal) * mat[t, blankIdx] | ||
|
||
# add beam at current time-step if needed | ||
addBeam(curr, labeling) | ||
|
||
# fill in data | ||
curr.entries[labeling].labeling = labeling | ||
curr.entries[labeling].prNonBlank += prNonBlank | ||
curr.entries[labeling].prBlank += prBlank | ||
curr.entries[labeling].prTotal += prBlank + prNonBlank | ||
curr.entries[labeling].prText = last.entries[labeling].prText # beam-labeling not changed, therefore also LM score unchanged from | ||
curr.entries[labeling].lmApplied = True # LM already applied at previous time-step for this beam-labeling | ||
|
||
# extend current beam-labeling | ||
for c in range(maxC - 1): | ||
# add new char to current beam-labeling | ||
newLabeling = labeling + (c,) | ||
|
||
# if new labeling contains duplicate char at the end, only consider paths ending with a blank | ||
if labeling and labeling[-1] == c: | ||
prNonBlank = mat[t, c] * last.entries[labeling].prBlank | ||
else: | ||
prNonBlank = mat[t, c] * last.entries[labeling].prTotal | ||
|
||
# add beam at current time-step if needed | ||
addBeam(curr, newLabeling) | ||
|
||
# fill in data | ||
curr.entries[newLabeling].labeling = newLabeling | ||
curr.entries[newLabeling].prNonBlank += prNonBlank | ||
curr.entries[newLabeling].prTotal += prNonBlank | ||
|
||
# apply LM | ||
applyLM(curr.entries[labeling], curr.entries[newLabeling], classes, lm) | ||
|
||
# set new beam state | ||
last = curr | ||
|
||
# normalise LM scores according to beam-labeling-length | ||
last.norm() | ||
|
||
# sort by probability | ||
bestLabelings = last.sort()[:k] # get most probable labeling | ||
|
||
output = [] | ||
for bestLabeling in bestLabelings: | ||
# map labels to chars | ||
res = '' | ||
for l in bestLabeling: | ||
res += classes[l] | ||
output.append(res) | ||
return output |
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
@@ -0,0 +1,115 @@ | ||||||||||||||||||||||||||||||||||
# LipNet: End-to-End Sentence-level Lipreading | ||||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. License isn't required on readme files. @szha if you feel strongly about adding it, I'm going to modify the readme in another PR later today and I can add it then. |
||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
--- | ||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
Gluon implementation of [LipNet: End-to-End Sentence-level Lipreading](https://arxiv.org/abs/1611.01599) | ||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
![net_structure](asset/network_structure.png) | ||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
## Requirements | ||||||||||||||||||||||||||||||||||
- Python 3.6.4 | ||||||||||||||||||||||||||||||||||
- MXnet 1.3.0 | ||||||||||||||||||||||||||||||||||
- The Required Disk Space: 35Gb | ||||||||||||||||||||||||||||||||||
``` | ||||||||||||||||||||||||||||||||||
pip install -r requirements.txt | ||||||||||||||||||||||||||||||||||
``` | ||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
## Test Environment | ||||||||||||||||||||||||||||||||||
- 4 CPU cores | ||||||||||||||||||||||||||||||||||
- 1 GPU (Tesla K80 12GB) | ||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
## The Data | ||||||||||||||||||||||||||||||||||
- The GRID audiovisual sentence corpus (http://spandh.dcs.shef.ac.uk/gridcorpus/) | ||||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Might be nice to add the description from the website here: GRID is a large multitalker audiovisual sentence corpus to support joint computational-behavioral studies in speech perception. In brief, the corpus consists of high-quality audio and video (facial) recordings of 1000 sentences spoken by each of 34 talkers (18 male, 16 female). Sentences are of the form "put red at G9 now". The corpus, together with transcriptions, is freely available for research use. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated |
||||||||||||||||||||||||||||||||||
- Video: (normal)(480 M each) | ||||||||||||||||||||||||||||||||||
- Align: word alignments(190 K each) | ||||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. One sentence explaining 'word alignments' would be really useful for people new to the domain. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated |
||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
## Prepare the Data | ||||||||||||||||||||||||||||||||||
### Download the data | ||||||||||||||||||||||||||||||||||
- arguments | ||||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add total download size here in GB. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated |
||||||||||||||||||||||||||||||||||
- src_path : Path for videos (default='./data/mp4s/') | ||||||||||||||||||||||||||||||||||
- align_path : Path for aligns (default='./data/') | ||||||||||||||||||||||||||||||||||
- n_process : num of process (default=1) | ||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
``` | ||||||||||||||||||||||||||||||||||
cd ./utils && python download_data.py --n_process $(nproc) | ||||||||||||||||||||||||||||||||||
``` | ||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
### Preprocess the Data: Extracting the mouth images from a video and save it. | ||||||||||||||||||||||||||||||||||
- arguments | ||||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add one or two sentences explaining what kind of preprocessing is being done (and very briefly how). And if you have a sample, maybe show before and after pictures? @aaronmarkham What was the outcome about hosting the pre-processed data? Could be really useful and save the user lots of time. Should still keeping the download and pre-processing scripts though, even if we do move to hosted pre-processed data. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @thomelane I had some issues earlier, but they seem to be resolved now. The files are being sync'd to s3://mxnet-public/lipnet/ - they're not done uploading yet. It has: So... the readme could have the following helper for getting the preprocessed data.
I've tested the above command and it is busy syncing right now on another instance! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Update more infomation in Readme |
||||||||||||||||||||||||||||||||||
- src_path : Path for videos (default='./data/mp4s/') | ||||||||||||||||||||||||||||||||||
- tgt_path : Path for preprocessed images (default='./data/datasets/') | ||||||||||||||||||||||||||||||||||
- n_process : num of process (default=1) | ||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Update |
||||||||||||||||||||||||||||||||||
You can run the preprocessing with just one processor, but this will take a long time (>48 hours). To use all of the available processors, use the following command: | ||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would be great to add pre-processing time estimates (for specified hardware that you used) with multiple processors. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated |
||||||||||||||||||||||||||||||||||
``` | ||||||||||||||||||||||||||||||||||
cd ./utils && python preprocess_data.py --n_process $(nproc) | ||||||||||||||||||||||||||||||||||
``` | ||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
## Data Structure | ||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
``` | ||||||||||||||||||||||||||||||||||
The training data folder should look like : | ||||||||||||||||||||||||||||||||||
<train_data_root> | ||||||||||||||||||||||||||||||||||
|--datasets | ||||||||||||||||||||||||||||||||||
|--s1 | ||||||||||||||||||||||||||||||||||
|--bbir7s | ||||||||||||||||||||||||||||||||||
|--mouth_000.png | ||||||||||||||||||||||||||||||||||
|--mouth_001.png | ||||||||||||||||||||||||||||||||||
... | ||||||||||||||||||||||||||||||||||
|--bgaa8p | ||||||||||||||||||||||||||||||||||
|--mouth_000.png | ||||||||||||||||||||||||||||||||||
|--mouth_001.png | ||||||||||||||||||||||||||||||||||
... | ||||||||||||||||||||||||||||||||||
|--s2 | ||||||||||||||||||||||||||||||||||
... | ||||||||||||||||||||||||||||||||||
|--align | ||||||||||||||||||||||||||||||||||
|--bw1d8a.align | ||||||||||||||||||||||||||||||||||
|--bggzzs.align | ||||||||||||||||||||||||||||||||||
... | ||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
``` | ||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
## Training | ||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would be great to add training time estimates (for specified hardware that you used). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated |
||||||||||||||||||||||||||||||||||
- arguments | ||||||||||||||||||||||||||||||||||
- batch_size : Define batch size (default=64) | ||||||||||||||||||||||||||||||||||
- epochs : Define total epochs (default=100) | ||||||||||||||||||||||||||||||||||
- image_path : Path for lip image files (default='./data/datasets/') | ||||||||||||||||||||||||||||||||||
- align_path : Path for align files (default='./data/align/') | ||||||||||||||||||||||||||||||||||
- dr_rate : Dropout rate(default=0.5) | ||||||||||||||||||||||||||||||||||
- num_gpus : Num of gpus (if num_gpus is 0, then use cpu) (default=1) | ||||||||||||||||||||||||||||||||||
- num_workers : Num of workers when generating data (default=0) | ||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
``` | ||||||||||||||||||||||||||||||||||
python main.py | ||||||||||||||||||||||||||||||||||
``` | ||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
## Results | ||||||||||||||||||||||||||||||||||
``` | ||||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add comment about how to generate these, either notebook or main.py. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Remove iypnb and Add the infer.py file |
||||||||||||||||||||||||||||||||||
[Target] | ||||||||||||||||||||||||||||||||||
['lay green with a zero again', | ||||||||||||||||||||||||||||||||||
'bin blue with r nine please', | ||||||||||||||||||||||||||||||||||
'set blue with e five again', | ||||||||||||||||||||||||||||||||||
'bin green by t seven soon', | ||||||||||||||||||||||||||||||||||
'lay red at d five now', | ||||||||||||||||||||||||||||||||||
'bin green in x eight now', | ||||||||||||||||||||||||||||||||||
'bin blue with e one now', | ||||||||||||||||||||||||||||||||||
'lay red at j nine now'] | ||||||||||||||||||||||||||||||||||
``` | ||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
``` | ||||||||||||||||||||||||||||||||||
[Pred] | ||||||||||||||||||||||||||||||||||
['lay green with s zero again', | ||||||||||||||||||||||||||||||||||
'bin blue with r nine please', | ||||||||||||||||||||||||||||||||||
'set blue with e five again', | ||||||||||||||||||||||||||||||||||
'bin green by t seven soon', | ||||||||||||||||||||||||||||||||||
'lay red at c five now', | ||||||||||||||||||||||||||||||||||
'bin green in x eight now', | ||||||||||||||||||||||||||||||||||
'bin blue with m one now', | ||||||||||||||||||||||||||||||||||
'lay red at j nine now'] | ||||||||||||||||||||||||||||||||||
``` | ||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
""" | ||
Description : Set DataSet module for lip images | ||
""" | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
|
||
import os | ||
import glob | ||
from mxnet import nd | ||
import mxnet.gluon.data.dataset as dataset | ||
from mxnet.gluon.data.vision.datasets import image | ||
from utils.align import Align | ||
|
||
class LipsDataset(dataset.Dataset): | ||
""" | ||
Description : DataSet class for lip images | ||
""" | ||
def __init__(self, root, align_root, flag=1, transform=None): | ||
self._root = os.path.expanduser(root) | ||
self._align_root = align_root | ||
self._flag = flag | ||
self._transform = transform | ||
self._exts = ['.jpg', '.jpeg', '.png'] | ||
self._list_images(self._root) | ||
|
||
def _list_images(self, root): | ||
""" | ||
Description : generate list for lip images | ||
""" | ||
self.labels = [] | ||
self.items = [] | ||
folder_path = glob.glob(os.path.join(root, "*", "*")) | ||
for folder in folder_path: | ||
filename = glob.glob(os.path.join(folder, "*")) | ||
if len(filename) != 75: | ||
continue | ||
filename.sort() | ||
label = os.path.split(folder)[-1] | ||
self.items.append((filename, label)) | ||
def align_generation(self, file_nm, padding=75): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add space above. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Completed adding space |
||
""" | ||
Description : Align to lip position | ||
""" | ||
align = Align(self._align_root + '/' + file_nm + '.align') | ||
return nd.array(align.sentence(padding)) | ||
def __getitem__(self, idx): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add space above. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Completed adding space |
||
img = list() | ||
for image_name in self.items[idx][0]: | ||
tmp_img = image.imread(image_name, self._flag) | ||
if self._transform is not None: | ||
tmp_img = self._transform(tmp_img) | ||
img.append(tmp_img) | ||
img = nd.stack(*img) | ||
#print(self.items[idx][0][0]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Might want to remove debug lines. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removed |
||
label = self.align_generation(self.items[idx][1]) | ||
return img, label | ||
|
||
def __len__(self): | ||
return len(self.items) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can we add shebang with python version? Preferably python3? thanks.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added. Thanks!