diff --git a/cacheflow/model_executor/parallel_utils/tensor_parallel/layers.py b/cacheflow/model_executor/parallel_utils/tensor_parallel/layers.py index 2ec8312f5006..c5441fd33c99 100644 --- a/cacheflow/model_executor/parallel_utils/tensor_parallel/layers.py +++ b/cacheflow/model_executor/parallel_utils/tensor_parallel/layers.py @@ -305,7 +305,7 @@ class ColumnParallelLinear(torch.nn.Module): """ bias = self.bias if not self.skip_bias_add else None - input_parallel = copy_to_tensor_model_parallel_region(input_) + input_parallel = input_ # Matrix multiply. output_parallel = F.linear(input_parallel, self.weight, bias) if self.gather_output: