in gpu-workload/t5/model/handler.py [0:0]
def inference(self, input_batch):
generations = self.model.generate(input_batch)
generations = self.tokenizer.batch_decode(generations, skip_special_tokens=True)
return generations