def inference()

in gpu-workload/t5/model/handler.py [0:0]


    def inference(self, input_batch):
        generations = self.model.generate(input_batch)
        generations = self.tokenizer.batch_decode(generations, skip_special_tokens=True)
        return generations