From 507ab8a46273692c06c7d5b4a5f17bc9d50970b9 Mon Sep 17 00:00:00 2001 From: xuxinyi04 Date: Wed, 10 Jul 2024 09:21:59 +0000 Subject: [PATCH 01/12] add_docs --- ...llel.initialize.get_model_parallel_rank.md | 25 ++++++++++ ...nitialize.get_model_parallel_world_size.md | 24 +++++++++ ...el.initialize.initialize_model_parallel.md | 39 +++++++++++++++ ...nitialize.model_parallel_is_initialized.md | 19 +++++++ ...el_parallel.layers.ColumnParallelLinear.md | 50 +++++++++++++++++++ ...model_parallel.layers.ParallelEmbedding.md | 44 ++++++++++++++++ ...model_parallel.layers.RowParallelLinear.md | 50 +++++++++++++++++++ .../torch.distributed.is_initialized.md | 25 ++++++++++ .../others/torch.inference_mode.md | 45 +++++++++++++++++ .../others/torch.set_default_tensor_dtype.md | 40 +++++++++++++++ 10 files changed, 361 insertions(+) create mode 100644 docs/guides/model_convert/convert_from_fairscale/api_difference/nn/fairscale.nn.model_parallel.initialize.get_model_parallel_rank.md create mode 100644 docs/guides/model_convert/convert_from_fairscale/api_difference/nn/fairscale.nn.model_parallel.initialize.get_model_parallel_world_size.md create mode 100644 docs/guides/model_convert/convert_from_fairscale/api_difference/nn/fairscale.nn.model_parallel.initialize.initialize_model_parallel.md create mode 100644 docs/guides/model_convert/convert_from_fairscale/api_difference/nn/fairscale.nn.model_parallel.initialize.model_parallel_is_initialized.md create mode 100644 docs/guides/model_convert/convert_from_fairscale/api_difference/nn/fairscale.nn.model_parallel.layers.ColumnParallelLinear.md create mode 100644 docs/guides/model_convert/convert_from_fairscale/api_difference/nn/fairscale.nn.model_parallel.layers.ParallelEmbedding.md create mode 100644 docs/guides/model_convert/convert_from_fairscale/api_difference/nn/fairscale.nn.model_parallel.layers.RowParallelLinear.md create mode 100644 docs/guides/model_convert/convert_from_pytorch/api_difference/distributed/torch.distributed.is_initialized.md create mode 100644 docs/guides/model_convert/convert_from_pytorch/api_difference/others/torch.inference_mode.md create mode 100644 docs/guides/model_convert/convert_from_pytorch/api_difference/others/torch.set_default_tensor_dtype.md diff --git a/docs/guides/model_convert/convert_from_fairscale/api_difference/nn/fairscale.nn.model_parallel.initialize.get_model_parallel_rank.md b/docs/guides/model_convert/convert_from_fairscale/api_difference/nn/fairscale.nn.model_parallel.initialize.get_model_parallel_rank.md new file mode 100644 index 00000000000..73a5ffd17c2 --- /dev/null +++ b/docs/guides/model_convert/convert_from_fairscale/api_difference/nn/fairscale.nn.model_parallel.initialize.get_model_parallel_rank.md @@ -0,0 +1,25 @@ +## [无参数]fairscale.nn.model_parallel.initialize.get_model_parallel_rank + +### [fairscale.nn.model_parallel.initialize.get_model_parallel_rank](https://github.com/facebookresearch/fairscale/blob/164cc0f3170b4a3951dd84dda29c3e1504ac4d6e/fairscale/nn/model_parallel/initialize.py#L155) + +```python +fairscale.nn.model_parallel.initialize.get_model_parallel_rank() +``` + +### [paddle.distributed.get_rank](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api/paddle/distributed/get_rank_cn.html) + +```python +paddle.distributed.fleet.base.topology._HYBRID_PARALLEL_GROUP.get_model_parallel_rank() +``` + +两者功能一致,均无参数。 + +### 转写示例 +```python +# PyTorch 写法 +fairscale.nn.model_parallel.initialize.get_model_parallel_size() + +# Paddle 写法 +assert paddle.distributed.fleet.base.topology._HYBRID_PARALLEL_GROUP is not None +paddle.distributed.fleet.base.topology._HYBRID_PARALLEL_GROUP.get_model_parallel_rank() +``` diff --git a/docs/guides/model_convert/convert_from_fairscale/api_difference/nn/fairscale.nn.model_parallel.initialize.get_model_parallel_world_size.md b/docs/guides/model_convert/convert_from_fairscale/api_difference/nn/fairscale.nn.model_parallel.initialize.get_model_parallel_world_size.md new file mode 100644 index 00000000000..0010c263fea --- /dev/null +++ b/docs/guides/model_convert/convert_from_fairscale/api_difference/nn/fairscale.nn.model_parallel.initialize.get_model_parallel_world_size.md @@ -0,0 +1,24 @@ +## [无参数]fairscale.nn.model_parallel.initialize.get_model_parallel_world_size + +### [fairscale.nn.model_parallel.initialize.get_model_parallel_world_size](https://github.com/facebookresearch/fairscale/blob/164cc0f3170b4a3951dd84dda29c3e1504ac4d6e/fairscale/nn/model_parallel/initialize.py#L150) + +```python +fairscale.nn.model_parallel.initialize.get_model_parallel_size() +``` + +### [paddle.distributed.get_world_size](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api/paddle/distributed/get_world_size_cn.html) + +```python +paddle.distributed.fleet.base.topology._HYBRID_PARALLEL_GROUP._mp_degree +``` +两者功能一致,均无参数。 + +### 转写示例 +```python +# PyTorch 写法 +fairscale.nn.model_parallel.initialize.get_model_parallel_size() + +# Paddle 写法 +assert paddle.distributed.fleet.base.topology._HYBRID_PARALLEL_GROUP is not None +paddle.distributed.fleet.base.topology._HYBRID_PARALLEL_GROUP._mp_degree +``` diff --git a/docs/guides/model_convert/convert_from_fairscale/api_difference/nn/fairscale.nn.model_parallel.initialize.initialize_model_parallel.md b/docs/guides/model_convert/convert_from_fairscale/api_difference/nn/fairscale.nn.model_parallel.initialize.initialize_model_parallel.md new file mode 100644 index 00000000000..204e95a39e4 --- /dev/null +++ b/docs/guides/model_convert/convert_from_fairscale/api_difference/nn/fairscale.nn.model_parallel.initialize.initialize_model_parallel.md @@ -0,0 +1,39 @@ +## [组合实现]fairscale.nn.model_parallel.initialize.initialize_model_parallel + +### [fairscale.nn.model_parallel.initialize.initialize_model_parallel](https://github.com/facebookresearch/fairscale/blob/164cc0f3170b4a3951dd84dda29c3e1504ac4d6e/fairscale/nn/model_parallel/initialize.py#L41) + +```python +fairscale.nn.model_parallel.initialize.initialize_model_parallel() +``` + +对模型并行设置进行初始化; Paddle 无此 API,需要组合实现。 + +### 参数映射 + +| fairscale | PaddlePaddle | 备注 | +| --------- | ------------ | -------- | +| model_parallel_size_ | | 模型并行规模 | +| pipeline_length | | 流水线并行规模 | +| model_parallel_backend | | 模型并行通信后端 | +| pipeline_backend | | 流水线并行通信后端 | +| ddp_backend | | 数据并行通信后端| + +### 转写示例 + +```python +# Pytorch 写法 +fairscale.nn.model_parallel.initialize.initialize_model_parallel(model_parallel_size_=model_parallel_size_,pipeline_length=pipeline_length) + +# Paddle 写法 +world_size = paddle.distributed.get_world_size() +rank = paddle.distributed.get_rank() +model_parallel_size = int(min(world_size,model_parallel_size_)) +data_parallel_size = int(world_size/ (model_parallel_size * pipeline_length)) +Strategy = paddle.distributed.fleet.DistributedStrategy() +Strategy_dict = dict() +Strategy_dict["dp_degree"] = data_parallel_size +Strategy_dict["mp_degree"] = model_parallel_size +Strategy_dict["pp_degree"] = pipeline_length +Strategy.hybrid_configs = Strategy_dict +paddle.distributed.fleet.init(is_collective=True, strategy=Strategy) +``` diff --git a/docs/guides/model_convert/convert_from_fairscale/api_difference/nn/fairscale.nn.model_parallel.initialize.model_parallel_is_initialized.md b/docs/guides/model_convert/convert_from_fairscale/api_difference/nn/fairscale.nn.model_parallel.initialize.model_parallel_is_initialized.md new file mode 100644 index 00000000000..6fe1c7e170d --- /dev/null +++ b/docs/guides/model_convert/convert_from_fairscale/api_difference/nn/fairscale.nn.model_parallel.initialize.model_parallel_is_initialized.md @@ -0,0 +1,19 @@ +## [组合实现]fairscale.nn.model_parallel.initialize.model_parallel_is_initialized + +### [fairscale.nn.model_parallel.initialize.model_parallel_is_initialized](https://github.com/facebookresearch/fairscale/blob/164cc0f3170b4a3951dd84dda29c3e1504ac4d6e/fairscale/nn/model_parallel/initialize.py#L119) + +```python +fairscale.nn.model_parallel.initialize.model_parallel_is_initialized() +``` + +返回模型并行初始化设置是否完成; Paddle 无此 API,需要组合实现。 + +### 转写示例 + +```python +# Pytorch 写法 +fairscale.nn.model_parallel.initialize.model_parallel_is_initialized() + +# Paddle 写法 +paddle.distributed.fleet.base.topology._HYBRID_PARALLEL_GROUP is not None +``` diff --git a/docs/guides/model_convert/convert_from_fairscale/api_difference/nn/fairscale.nn.model_parallel.layers.ColumnParallelLinear.md b/docs/guides/model_convert/convert_from_fairscale/api_difference/nn/fairscale.nn.model_parallel.layers.ColumnParallelLinear.md new file mode 100644 index 00000000000..724505fd91f --- /dev/null +++ b/docs/guides/model_convert/convert_from_fairscale/api_difference/nn/fairscale.nn.model_parallel.layers.ColumnParallelLinear.md @@ -0,0 +1,50 @@ +## [torch 参数更多]fairscale.nn.model_parallel.layers.ColumnParallelLinear + +### [fairscale.nn.model_parallel.layers.ColumnParallelLinear](https://github.com/facebookresearch/fairscale/blob/164cc0f3170b4a3951dd84dda29c3e1504ac4d6e/fairscale/nn/model_parallel/layers.py#L218) + +```python +fairscale.nn.model_parallel.initialize.ColumnParallelLinear(in_features,out_features,bias,gather_output,init_method,stride,keep_master_weight_for_test) +``` +### [paddle.distributed.meta_parallel.parallel_layers.mp_layers.ColumnParallelLinear](https://github.com/PaddlePaddle/Paddle/blob/016766cc89fabc10181453ce70b701dd8ed019f6/python/paddle/distributed/fleet/layers/mpu/mp_layers.py#L153) + +```python +paddle.distributed.meta_parallel.parallel_layers.mp_layers.ColumnParallelLinear(in_features,out_features,weight_attr,has_bias,gather_output,fuse_matmul_bias,mp_group,name) +``` + +两者功能大体一致,torch 的参数更多。 + +### 参数映射 + +| fairscale | PaddlePaddle | 备注 | +| --------- | ------------ | -------- | +| in_features | in_features| 输入特征数 | +| out_features |out_features |输出特征数| +| bias |has_bias | 是否增加 bias | +| gather_output |gather_output | 是否对每个 rank 的输出 allgather | +| init_method | | 参数初始化方法| +| |weight_attr | 网络层参数属性| +| stride | | 线性层滑动步长 | +| keep_master_weight_for_test | | 返回主参数用于测试 | +| |fuse_matmul_bias | 是否融合矩阵乘和加 bias 操作 | +| | mp_group| 向量并行组| +| | name| 网络层名称| + +### 转写示例 + +```python +# Pytorch 写法 +fairscale.nn.model_parallel.initialize.ColumnParallelLinear(in_features=in_features, + out_features=out_features,bias=False,gather_out=False) + +# Paddle 写法 +paddle.distributed.meta_parallel.parallel_layers.mp_layers.ColumnParallelLinear(in_features=in_features, + out_features=in_features,has_bias=False, gather_output=False) + +# Pytorch 写法 +fairscale.nn.model_parallel.initialize.ColumnParallelLinear(in_features=in_features, + out_features=out_features) + +# Paddle 写法 +paddle.distributed.meta_parallel.parallel_layers.mp_layers.ColumnParallelLinear(in_features=in_features, + out_features=in_features,has_bias=True) +``` diff --git a/docs/guides/model_convert/convert_from_fairscale/api_difference/nn/fairscale.nn.model_parallel.layers.ParallelEmbedding.md b/docs/guides/model_convert/convert_from_fairscale/api_difference/nn/fairscale.nn.model_parallel.layers.ParallelEmbedding.md new file mode 100644 index 00000000000..e0789ce2a3a --- /dev/null +++ b/docs/guides/model_convert/convert_from_fairscale/api_difference/nn/fairscale.nn.model_parallel.layers.ParallelEmbedding.md @@ -0,0 +1,44 @@ +## [torch 参数更多]fairscale.nn.model_parallel.layers.ParallelEmbedding + +### [fairscale.nn.model_parallel.layers.ParallelEmbedding](https://github.com/facebookresearch/fairscale/blob/164cc0f3170b4a3951dd84dda29c3e1504ac4d6e/fairscale/nn/model_parallel/layers.py#L152) + +```python +fairscale.nn.model_parallel.initialize.ParallelEmbedding(num_embeddings,embedding_dim,padding_idx,max_norm,norm_type,scale_grad_by_freq,sparse,init_method,keep_master_weight_for_test) +``` +### [paddle.distributed.meta_parallel.parallel_layers.mp_layers.VocabParallelEmbedding](https://github.com/PaddlePaddle/Paddle/blob/016766cc89fabc10181453ce70b701dd8ed019f6/python/paddle/distributed/fleet/layers/mpu/mp_layers.py#L37) + +```python +paddle.distributed.meta_parallel.parallel_layers.mp_layers.VocabParallelEmbedding(num_embeddings,embedding_dim,weight_attr,mp_group,name) +``` + +两者功能大体一致,但内部实现细节不一样,ParallelEmbedding 的切分方向沿着 embedding 方向,VocabParallelEmbedding 的切分方向沿着 vocab(词汇表)方向。 + +### 参数映射 + +| fairscale | PaddlePaddle | 备注 | +| --------- | ------------ | -------- | +| num_embeddings | num_embeddings|词汇表大小 | +| embedding_dim |embedding_dim |embedding 的维度大小| +| padding_idx | | 填充下标处的数据对梯度无贡献 | +| max_norm | | 范数大于 maxnorm 的数值被设置为 maxnorm| +| norm_type | | 设置 p 范数| +| sparse | | 是否为稀疏向量 | +| scale_grad_by_freq| | 是否根据 batch 内单词的频数的倒数缩放梯度 | +| init_method | | 参数初始化方法| +| keep_master_weight_for_test | | 返回主参数用于测试 | +| | mp_group| 向量并行组| +| | name| 网络层名称| + + +### 转写示例 + +```python +# Pytorch 写法 +fairscale.nn.model_parallel.initialize.ParallelEmbedding(num_embeddings=num_embeddings, + embedding_dim=embedding_dim) + +# Paddle 写法 +paddle.distributed.meta_parallel.parallel_layers.mp_layers.VocabParallelEmbedding(num_embeddings=num_embeddings, + embedding_dim=embedding_dim) + +``` diff --git a/docs/guides/model_convert/convert_from_fairscale/api_difference/nn/fairscale.nn.model_parallel.layers.RowParallelLinear.md b/docs/guides/model_convert/convert_from_fairscale/api_difference/nn/fairscale.nn.model_parallel.layers.RowParallelLinear.md new file mode 100644 index 00000000000..ef397d2ed95 --- /dev/null +++ b/docs/guides/model_convert/convert_from_fairscale/api_difference/nn/fairscale.nn.model_parallel.layers.RowParallelLinear.md @@ -0,0 +1,50 @@ +## [torch 参数更多]fairscale.nn.model_parallel.layers.RowParallelLinear + +### [fairscale.nn.model_parallel.layers.RowParallelLinear](https://github.com/facebookresearch/fairscale/blob/164cc0f3170b4a3951dd84dda29c3e1504ac4d6e/fairscale/nn/model_parallel/layers.py#L299) + +```python +fairscale.nn.model_parallel.initialize.RowParallelLinear(in_features,out_features,bias,input_is_parallel,init_method,stride,keep_master_weight_for_test) +``` +### [paddle.distributed.meta_parallel.parallel_layers.mp_layers.RowParallelLinear](https://github.com/PaddlePaddle/Paddle/blob/016766cc89fabc10181453ce70b701dd8ed019f6/python/paddle/distributed/fleet/layers/mpu/mp_layers.py#L291) + +```python +paddle.distributed.meta_parallel.parallel_layers.mp_layers.RowParallelLinear(in_features,out_features,weight_attr,has_bias,input_is_parallel,fuse_matmul_bias,mp_group,name) +``` + +两者功能大体一致,参数不一致。 + +### 参数映射 + +| fairscale | PaddlePaddle | 备注 | +| --------- | ------------ | -------- | +| in_features | in_features| 输入特征数 | +| out_features |out_features |输出特征数| +| bias |has_bias | 是否增加 bias | +| input_is_parallel |input_is_parallel | 输入是否在 GPUs 上进行过分割,如果是就不再分割 | +| init_method | | 参数初始化方法| +| |weight_attr | 网络层参数属性| +| stride | | 线性层滑动步长 | +| keep_master_weight_for_test | | 返回主参数用于测试 | +| |fuse_matmul_bias | 是否融合 matmul 和 bias 操作 | +| | mp_group| 向量并行组| +| | name| 网络层名称| + +### 转写示例 + +```python +# Pytorch 写法 +fairscale.nn.model_parallel.initialize.RowParallelLinear(in_features=in_features, + out_features=out_features,bias=False,input_is_parallel=False) + +# Paddle 写法 +paddle.distributed.meta_parallel.parallel_layers.mp_layers.RowParallelLinear(in_features=in_features, + out_features=in_features,has_bias=False, input_is_parallel=False) + +# Pytorch 写法 +fairscale.nn.model_parallel.initialize.RowParallelLinear(in_features=in_features, + out_features=out_features) + +# Paddle 写法 +paddle.distributed.meta_parallel.parallel_layers.mp_layers.RowParallelLinear(in_features=in_features, + out_features=in_features,has_bias=True) +``` diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference/distributed/torch.distributed.is_initialized.md b/docs/guides/model_convert/convert_from_pytorch/api_difference/distributed/torch.distributed.is_initialized.md new file mode 100644 index 00000000000..f3e40c8b18c --- /dev/null +++ b/docs/guides/model_convert/convert_from_pytorch/api_difference/distributed/torch.distributed.is_initialized.md @@ -0,0 +1,25 @@ + +### [无参数]torch.distributed.is_initialized + +### [torch.distributed.is_initialized](https://pytorch.org/docs/stable/distributed.html#torch.distributed.is_initialized) + +```python +torch.distributed.is_initialized() +``` + +### [paddle.distributed.is_initialized](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api/paddle/distributed/is_initialized_cn.html#is-initialized) + +```python +paddle.distributed.is_initialized() +``` + +两者功能一致,无参数。 + +### 转写示例 +```python +# PyTorch 写法 +torch.distributed.is_initialized() + +# Paddle 写法 +paddle.distributed.is_initialized() +``` diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference/others/torch.inference_mode.md b/docs/guides/model_convert/convert_from_pytorch/api_difference/others/torch.inference_mode.md new file mode 100644 index 00000000000..5b4611e8074 --- /dev/null +++ b/docs/guides/model_convert/convert_from_pytorch/api_difference/others/torch.inference_mode.md @@ -0,0 +1,45 @@ +## [ 仅参数名不一致 ] torch.inference_mode + +### [torch.inference_mode](https://pytorch.org/docs/stable/generated/torch.no_grad.html) + +```python +torch.inference_mode(mode=True) +``` + +### [paddle.no_grad](https://www.paddlepaddle.org.cn/documentation/docs/zh/api/paddle/no_grad_cn.html) + +```python +paddle.no_grad(func=None) +``` + +inference_mode 会额外禁用视图跟踪和版本计数器,提高推理性能,其他功能一致。此外 mode 参数额外支持 bool 类型,具体如下: + +### 参数映射 + +| PyTorch | PaddlePaddle | 备注 | +| ----------- | ------------ | ----------------------------------------------------------------------------------------- | +| mode | func | mode 为函数时,仅参数名不同,作为上下文管理器使用时,mode=True 可忽略该参数,mode=False 时,应移除之(替换为空装饰器) | + +### 转写示例 +```python +# PyTorch 写法 +@torch.inference_mode() +def doubler(x): + return x * 2 + +# Paddle 写法 +@paddle.no_grad() +def doubler(x): + return x * 2 + +# PyTorch 写法 +@torch.inference_mode(False) +def doubler(x): + return x * 2 + +# Paddle 写法 +@paddle_aux.empty_decorator +def doubler(x): + return x * 2 + +``` diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference/others/torch.set_default_tensor_dtype.md b/docs/guides/model_convert/convert_from_pytorch/api_difference/others/torch.set_default_tensor_dtype.md new file mode 100644 index 00000000000..beb029beb9f --- /dev/null +++ b/docs/guides/model_convert/convert_from_pytorch/api_difference/others/torch.set_default_tensor_dtype.md @@ -0,0 +1,40 @@ +## [ 参数不一致 ] torch.set_default_tensor_type + +### [torch.set_default_tensor_type](https://pytorch.org/docs/stable/generated/torch.set_default_tensor_type.html#torch-set-default-tensor-type) + +```python +torch.set_default_tensor_dtype(d) +``` + +### [paddle.set_default_dtype](https://www.paddlepaddle.org.cn/documentation/docs/zh/api/paddle/set_default_dtype_cn.html) + +```python +paddle.set_default_dtype(d) +``` + +两者功能一致,支持的参数类型相同,但参数用法不一致,需将 d 转换为 paddle 可识别类型,具体如下: + +### 参数映射 + +| PyTorch | PaddlePaddle | 备注 | +| ----------- | ------------ | -------------------------------------------------------------------------------------- | +| d | d | 全局默认数据类型,均支持所有浮点类型| + +### 转写示例 +```python +# pytorch +torch.set_default_tensor_type(torch.HalfTensor) +torch.set_default_tensor_type('torch.HalfTensor') +torch.set_default_tensor_type(torch.FloatTensor) +torch.set_default_tensor_type('torch.FloatTensor') +torch.set_default_tensor_type(torch.DoubleTensor) +torch.set_default_tensor_type('torch.DoubleTensor') + +# paddle +paddle.set_default_dtype('float16') +paddle.set_default_dtype('float16') +paddle.set_default_dtype('float32') +paddle.set_default_dtype('float32') +paddle.set_default_dtype('float64') +paddle.set_default_dtype('float64') +``` From 54b800e35088b8bd3810dd749767af2fa8a8f302 Mon Sep 17 00:00:00 2001 From: xuxinyi04 Date: Wed, 10 Jul 2024 13:28:41 +0000 Subject: [PATCH 02/12] fix --- .../distributed/torch.distributed.is_initialized.md | 3 +-- ...efault_tensor_dtype.md => torch.set_default_tensor_type.md} | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) rename docs/guides/model_convert/convert_from_pytorch/api_difference/others/{torch.set_default_tensor_dtype.md => torch.set_default_tensor_type.md} (95%) diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference/distributed/torch.distributed.is_initialized.md b/docs/guides/model_convert/convert_from_pytorch/api_difference/distributed/torch.distributed.is_initialized.md index f3e40c8b18c..2a69704e9c2 100644 --- a/docs/guides/model_convert/convert_from_pytorch/api_difference/distributed/torch.distributed.is_initialized.md +++ b/docs/guides/model_convert/convert_from_pytorch/api_difference/distributed/torch.distributed.is_initialized.md @@ -1,5 +1,4 @@ - -### [无参数]torch.distributed.is_initialized +## [无参数]torch.distributed.is_initialized ### [torch.distributed.is_initialized](https://pytorch.org/docs/stable/distributed.html#torch.distributed.is_initialized) diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference/others/torch.set_default_tensor_dtype.md b/docs/guides/model_convert/convert_from_pytorch/api_difference/others/torch.set_default_tensor_type.md similarity index 95% rename from docs/guides/model_convert/convert_from_pytorch/api_difference/others/torch.set_default_tensor_dtype.md rename to docs/guides/model_convert/convert_from_pytorch/api_difference/others/torch.set_default_tensor_type.md index beb029beb9f..6d363b606ec 100644 --- a/docs/guides/model_convert/convert_from_pytorch/api_difference/others/torch.set_default_tensor_dtype.md +++ b/docs/guides/model_convert/convert_from_pytorch/api_difference/others/torch.set_default_tensor_type.md @@ -1,4 +1,4 @@ -## [ 参数不一致 ] torch.set_default_tensor_type +## [ 输入参数用法不一致 ] torch.set_default_tensor_type ### [torch.set_default_tensor_type](https://pytorch.org/docs/stable/generated/torch.set_default_tensor_type.html#torch-set-default-tensor-type) From 77ef98f49509b35f88f611303c67639b2eb7409a Mon Sep 17 00:00:00 2001 From: xuxinyi04 Date: Fri, 12 Jul 2024 03:12:37 +0000 Subject: [PATCH 03/12] fix_all --- .../others/torch.set_default_tensor_type.md | 2 +- ...llel.initialize.get_model_parallel_rank.md | 2 +- ...nitialize.get_model_parallel_world_size.md | 2 +- ...el.initialize.initialize_model_parallel.md | 2 +- ...nitialize.model_parallel_is_initialized.md | 2 +- ...el_parallel.layers.ColumnParallelLinear.md | 2 +- ...model_parallel.layers.ParallelEmbedding.md | 2 +- ...model_parallel.layers.RowParallelLinear.md | 4 +- .../apply_reference_from_api_difference.py | 41 +++++++++++++++++++ .../pytorch_api_mapping_cn.md | 13 ++++++ .../validate_mapping_in_api_difference.py | 10 +++++ 11 files changed, 73 insertions(+), 9 deletions(-) rename docs/guides/model_convert/{convert_from_fairscale/api_difference/nn => convert_from_pytorch/api_difference_third_party/fairscale}/fairscale.nn.model_parallel.initialize.get_model_parallel_rank.md (77%) rename docs/guides/model_convert/{convert_from_fairscale/api_difference/nn => convert_from_pytorch/api_difference_third_party/fairscale}/fairscale.nn.model_parallel.initialize.get_model_parallel_world_size.md (78%) rename docs/guides/model_convert/{convert_from_fairscale/api_difference/nn => convert_from_pytorch/api_difference_third_party/fairscale}/fairscale.nn.model_parallel.initialize.initialize_model_parallel.md (94%) rename docs/guides/model_convert/{convert_from_fairscale/api_difference/nn => convert_from_pytorch/api_difference_third_party/fairscale}/fairscale.nn.model_parallel.initialize.model_parallel_is_initialized.md (86%) rename docs/guides/model_convert/{convert_from_fairscale/api_difference/nn => convert_from_pytorch/api_difference_third_party/fairscale}/fairscale.nn.model_parallel.layers.ColumnParallelLinear.md (98%) rename docs/guides/model_convert/{convert_from_fairscale/api_difference/nn => convert_from_pytorch/api_difference_third_party/fairscale}/fairscale.nn.model_parallel.layers.ParallelEmbedding.md (98%) rename docs/guides/model_convert/{convert_from_fairscale/api_difference/nn => convert_from_pytorch/api_difference_third_party/fairscale}/fairscale.nn.model_parallel.layers.RowParallelLinear.md (97%) diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference/others/torch.set_default_tensor_type.md b/docs/guides/model_convert/convert_from_pytorch/api_difference/others/torch.set_default_tensor_type.md index 6d363b606ec..ef749681ede 100644 --- a/docs/guides/model_convert/convert_from_pytorch/api_difference/others/torch.set_default_tensor_type.md +++ b/docs/guides/model_convert/convert_from_pytorch/api_difference/others/torch.set_default_tensor_type.md @@ -3,7 +3,7 @@ ### [torch.set_default_tensor_type](https://pytorch.org/docs/stable/generated/torch.set_default_tensor_type.html#torch-set-default-tensor-type) ```python -torch.set_default_tensor_dtype(d) +torch.set_default_tensor_type(d) ``` ### [paddle.set_default_dtype](https://www.paddlepaddle.org.cn/documentation/docs/zh/api/paddle/set_default_dtype_cn.html) diff --git a/docs/guides/model_convert/convert_from_fairscale/api_difference/nn/fairscale.nn.model_parallel.initialize.get_model_parallel_rank.md b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.initialize.get_model_parallel_rank.md similarity index 77% rename from docs/guides/model_convert/convert_from_fairscale/api_difference/nn/fairscale.nn.model_parallel.initialize.get_model_parallel_rank.md rename to docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.initialize.get_model_parallel_rank.md index 73a5ffd17c2..9ab7934b701 100644 --- a/docs/guides/model_convert/convert_from_fairscale/api_difference/nn/fairscale.nn.model_parallel.initialize.get_model_parallel_rank.md +++ b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.initialize.get_model_parallel_rank.md @@ -6,7 +6,7 @@ fairscale.nn.model_parallel.initialize.get_model_parallel_rank() ``` -### [paddle.distributed.get_rank](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api/paddle/distributed/get_rank_cn.html) +### [paddle.distributed.fleet.base.topology._HYBRID_PARALLEL_GROUP.get_model_parallel_rank](https://github.com/PaddlePaddle/Paddle/blob/ddac1b431483ddc0f1ee600e799aa31fc0a75961/python/paddle/distributed/fleet/base/topology.py#L463) ```python paddle.distributed.fleet.base.topology._HYBRID_PARALLEL_GROUP.get_model_parallel_rank() diff --git a/docs/guides/model_convert/convert_from_fairscale/api_difference/nn/fairscale.nn.model_parallel.initialize.get_model_parallel_world_size.md b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.initialize.get_model_parallel_world_size.md similarity index 78% rename from docs/guides/model_convert/convert_from_fairscale/api_difference/nn/fairscale.nn.model_parallel.initialize.get_model_parallel_world_size.md rename to docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.initialize.get_model_parallel_world_size.md index 0010c263fea..9db0bd92fd6 100644 --- a/docs/guides/model_convert/convert_from_fairscale/api_difference/nn/fairscale.nn.model_parallel.initialize.get_model_parallel_world_size.md +++ b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.initialize.get_model_parallel_world_size.md @@ -6,7 +6,7 @@ fairscale.nn.model_parallel.initialize.get_model_parallel_size() ``` -### [paddle.distributed.get_world_size](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api/paddle/distributed/get_world_size_cn.html) +### [paddle.distributed.fleet.base.topology._HYBRID_PARALLEL_GROUP._mp_degree](https://github.com/PaddlePaddle/Paddle/blob/ddac1b431483ddc0f1ee600e799aa31fc0a75961/python/paddle/distributed/fleet/base/topology.py#L185) ```python paddle.distributed.fleet.base.topology._HYBRID_PARALLEL_GROUP._mp_degree diff --git a/docs/guides/model_convert/convert_from_fairscale/api_difference/nn/fairscale.nn.model_parallel.initialize.initialize_model_parallel.md b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.initialize.initialize_model_parallel.md similarity index 94% rename from docs/guides/model_convert/convert_from_fairscale/api_difference/nn/fairscale.nn.model_parallel.initialize.initialize_model_parallel.md rename to docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.initialize.initialize_model_parallel.md index 204e95a39e4..7bc16f05d2d 100644 --- a/docs/guides/model_convert/convert_from_fairscale/api_difference/nn/fairscale.nn.model_parallel.initialize.initialize_model_parallel.md +++ b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.initialize.initialize_model_parallel.md @@ -1,4 +1,4 @@ -## [组合实现]fairscale.nn.model_parallel.initialize.initialize_model_parallel +## [组合替代实现]fairscale.nn.model_parallel.initialize.initialize_model_parallel ### [fairscale.nn.model_parallel.initialize.initialize_model_parallel](https://github.com/facebookresearch/fairscale/blob/164cc0f3170b4a3951dd84dda29c3e1504ac4d6e/fairscale/nn/model_parallel/initialize.py#L41) diff --git a/docs/guides/model_convert/convert_from_fairscale/api_difference/nn/fairscale.nn.model_parallel.initialize.model_parallel_is_initialized.md b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.initialize.model_parallel_is_initialized.md similarity index 86% rename from docs/guides/model_convert/convert_from_fairscale/api_difference/nn/fairscale.nn.model_parallel.initialize.model_parallel_is_initialized.md rename to docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.initialize.model_parallel_is_initialized.md index 6fe1c7e170d..cdb8b3fca81 100644 --- a/docs/guides/model_convert/convert_from_fairscale/api_difference/nn/fairscale.nn.model_parallel.initialize.model_parallel_is_initialized.md +++ b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.initialize.model_parallel_is_initialized.md @@ -1,4 +1,4 @@ -## [组合实现]fairscale.nn.model_parallel.initialize.model_parallel_is_initialized +## [组合替代实现]fairscale.nn.model_parallel.initialize.model_parallel_is_initialized ### [fairscale.nn.model_parallel.initialize.model_parallel_is_initialized](https://github.com/facebookresearch/fairscale/blob/164cc0f3170b4a3951dd84dda29c3e1504ac4d6e/fairscale/nn/model_parallel/initialize.py#L119) diff --git a/docs/guides/model_convert/convert_from_fairscale/api_difference/nn/fairscale.nn.model_parallel.layers.ColumnParallelLinear.md b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.ColumnParallelLinear.md similarity index 98% rename from docs/guides/model_convert/convert_from_fairscale/api_difference/nn/fairscale.nn.model_parallel.layers.ColumnParallelLinear.md rename to docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.ColumnParallelLinear.md index 724505fd91f..462a5987dc0 100644 --- a/docs/guides/model_convert/convert_from_fairscale/api_difference/nn/fairscale.nn.model_parallel.layers.ColumnParallelLinear.md +++ b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.ColumnParallelLinear.md @@ -26,7 +26,7 @@ paddle.distributed.meta_parallel.parallel_layers.mp_layers.ColumnParallelLinear( | stride | | 线性层滑动步长 | | keep_master_weight_for_test | | 返回主参数用于测试 | | |fuse_matmul_bias | 是否融合矩阵乘和加 bias 操作 | -| | mp_group| 向量并行组| +| | mp_group| 模型并行组| | | name| 网络层名称| ### 转写示例 diff --git a/docs/guides/model_convert/convert_from_fairscale/api_difference/nn/fairscale.nn.model_parallel.layers.ParallelEmbedding.md b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.ParallelEmbedding.md similarity index 98% rename from docs/guides/model_convert/convert_from_fairscale/api_difference/nn/fairscale.nn.model_parallel.layers.ParallelEmbedding.md rename to docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.ParallelEmbedding.md index e0789ce2a3a..8627a73b9b4 100644 --- a/docs/guides/model_convert/convert_from_fairscale/api_difference/nn/fairscale.nn.model_parallel.layers.ParallelEmbedding.md +++ b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.ParallelEmbedding.md @@ -26,7 +26,7 @@ paddle.distributed.meta_parallel.parallel_layers.mp_layers.VocabParallelEmbeddin | scale_grad_by_freq| | 是否根据 batch 内单词的频数的倒数缩放梯度 | | init_method | | 参数初始化方法| | keep_master_weight_for_test | | 返回主参数用于测试 | -| | mp_group| 向量并行组| +| | mp_group| 模型并行组| | | name| 网络层名称| diff --git a/docs/guides/model_convert/convert_from_fairscale/api_difference/nn/fairscale.nn.model_parallel.layers.RowParallelLinear.md b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.RowParallelLinear.md similarity index 97% rename from docs/guides/model_convert/convert_from_fairscale/api_difference/nn/fairscale.nn.model_parallel.layers.RowParallelLinear.md rename to docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.RowParallelLinear.md index ef397d2ed95..f6ccd4b4045 100644 --- a/docs/guides/model_convert/convert_from_fairscale/api_difference/nn/fairscale.nn.model_parallel.layers.RowParallelLinear.md +++ b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.RowParallelLinear.md @@ -26,8 +26,8 @@ paddle.distributed.meta_parallel.parallel_layers.mp_layers.RowParallelLinear(in_ | stride | | 线性层滑动步长 | | keep_master_weight_for_test | | 返回主参数用于测试 | | |fuse_matmul_bias | 是否融合 matmul 和 bias 操作 | -| | mp_group| 向量并行组| -| | name| 网络层名称| +| | mp_group| 模型并行组 | +| | name| 网络层名称 | ### 转写示例 diff --git a/docs/guides/model_convert/convert_from_pytorch/apply_reference_from_api_difference.py b/docs/guides/model_convert/convert_from_pytorch/apply_reference_from_api_difference.py index 6c34eda10ef..a2b616cd17f 100644 --- a/docs/guides/model_convert/convert_from_pytorch/apply_reference_from_api_difference.py +++ b/docs/guides/model_convert/convert_from_pytorch/apply_reference_from_api_difference.py @@ -78,6 +78,9 @@ def mapping_type_to_description(mapping_type): NOT_IMPLEMENTED_PATTERN = re.compile( r"^\| *NOT-IMPLEMENTED-ITEM\( *(?P[^,]+) *, *(?P.+) *\) *\|$" ) +MANUAL_MAINTAINING_PATTERN = re.compile( + r"^\| *MANUAL_MAINTAINING-ITEM\(*(?P[^,]+) *,*(?P[^,]+) *, *(?P[^,]+) *,*(?P[^,]+) *, *(?P[^,]+) *, *(?P.+) *\) *\|$" +) DOCS_REPO_BASEURL = "https://github.com/PaddlePaddle/docs/tree/develop/docs/guides/model_convert/convert_from_pytorch/" @@ -158,6 +161,7 @@ def apply_reference_to_row_ex(line, metadata_dict, context, line_idx): reference_table_match = REFERENCE_TABLE_PATTERN.match(line) alias_match = ALIAS_PATTERN.match(line) not_implemented_match = NOT_IMPLEMENTED_PATTERN.match(line) + manual_maintaining_match = MANUAL_MAINTAINING_PATTERN.match(line) row_idx_s = str(context["table_row_idx"]) @@ -208,7 +212,44 @@ def apply_reference_to_row_ex(line, metadata_dict, context, line_idx): mapping_column, mapping_url_column, ] + output = "| " + " | ".join(content) + " |\n" + return [output] + elif manual_maintaining_match: + torch_api = ( + manual_maintaining_match["torch_api"].strip("`").replace(r"\_", "_") + ) + torch_url = ( + manual_maintaining_match["torch_url"].strip("`").replace(r"\_", "_") + ) + paddle_api = ( + manual_maintaining_match["paddle_api"] + .strip("`") + .replace(r"\_", "_") + ) + paddle_url = ( + manual_maintaining_match["paddle_url"] + .strip("`") + .replace(r"\_", "_") + ) + mapping_column = ( + manual_maintaining_match["mapping_type_desc"] + .strip() + .replace(r"\_", "_") + ) + diff_page_url = ( + manual_maintaining_match["diff_url"].strip("`").replace(r"\_", "_") + ) + mapping_url_column = f"[详细对比]({diff_page_url})" + torch_api_column = f"[`{torch_api}`]({torch_url})" + paddle_api_column = f"[`{paddle_api}`]({paddle_url})" + content = [ + row_idx_s, + torch_api_column, + paddle_api_column, + mapping_column, + mapping_url_column, + ] output = "| " + " | ".join(content) + " |\n" return [output] else: diff --git a/docs/guides/model_convert/convert_from_pytorch/pytorch_api_mapping_cn.md b/docs/guides/model_convert/convert_from_pytorch/pytorch_api_mapping_cn.md index 65b47bb117a..66b44083821 100644 --- a/docs/guides/model_convert/convert_from_pytorch/pytorch_api_mapping_cn.md +++ b/docs/guides/model_convert/convert_from_pytorch/pytorch_api_mapping_cn.md @@ -29,6 +29,7 @@ | [torch.optim.XX](#id22) | 主要为`torch.optim.XX`类 API | | [torch.sparse.XX](#id12) | 主要为`torch.sparse.XX`类 API | | [其他](#id13) | 其他 API | +| [fairscale.xx](#id23) | 第三方库 fairscale API | ## torch.XX API 映射列表 @@ -415,4 +416,16 @@ | NOT-IMPLEMENTED-ITEM(`torch.utils.cpp_extension.include_paths`, https://pytorch.org/docs/stable/cpp_extension.html#torch.utils.cpp_extension.include_paths) | | NOT-IMPLEMENTED-ITEM(`torch.utils.cpp_extension.load_inline`, https://pytorch.org/docs/stable/cpp_extension.html#torch.utils.cpp_extension.load_inline) | + + ## fairscale.XX API 映射列表 +| 序号 | Pytorch 最新 release | Paddle develop | 映射关系分类 | 备注 | +| ----- | ----------- | ----------------- | ----------- | ------- | +|MANUAL_MAINTAINING-ITEM(`fairscale.nn.model_parallel.initialize.get_model_parallel_rank`,https://github.com/facebookresearch/fairscale/blob/164cc0f3170b4a3951dd84dda29c3e1504ac4d6e/fairscale/nn/model_parallel/initialize.py#L155, `paddle.distributed.fleet.base.topology._HYBRID_PARALLEL_GROUP.get_model_parallel_rank`, https://github.com/PaddlePaddle/Paddle/blob/ddac1b431483ddc0f1ee600e799aa31fc0a75961/python/paddle/distributed/fleet/base/topology.py#L463, 无参数 , https://github.com/PaddlePaddle/docs/tree/develop/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.initialize.get_model_parallel_rank.md) | +|MANUAL_MAINTAINING-ITEM(`fairscale.nn.model_parallel.initialize.get_model_parallel_world_size`,https://github.com/facebookresearch/fairscale/blob/164cc0f3170b4a3951dd84dda29c3e1504ac4d6e/fairscale/nn/model_parallel/initialize.py#L150, `paddle.distributed.fleet.base.topology._HYBRID_PARALLEL_GROUP._mp_degree`,https://github.com/PaddlePaddle/Paddle/blob/ddac1b431483ddc0f1ee600e799aa31fc0a75961/python/paddle/distributed/fleet/base/topology.py#L185, 无参数 , https://github.com/PaddlePaddle/docs/tree/develop/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.initialize.get_model_parallel_world_size.md) | +|MANUAL_MAINTAINING-ITEM(`fairscale.nn.model_parallel.initialize.initialize_model_parallel`,https://github.com/facebookresearch/fairscale/blob/164cc0f3170b4a3951dd84dda29c3e1504ac4d6e/fairscale/nn/model_parallel/initialize.py#L41, ` `, , 组合替代实现 , https://github.com/PaddlePaddle/docs/tree/develop/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.initialize.initialize_model_parallel.md) | +|MANUAL_MAINTAINING-ITEM(`fairscale.nn.model_parallel.initialize.model_parallel_is_initialized`, https://github.com/facebookresearch/fairscale/blob/164cc0f3170b4a3951dd84dda29c3e1504ac4d6e/fairscale/nn/model_parallel/initialize.py#L119,` `, , 组合替代实现 , https://github.com/PaddlePaddle/docs/tree/develop/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.initialize.model_parallel_is_initialized.md) | +|MANUAL_MAINTAINING-ITEM(`fairscale.nn.model_parallel.layers.ColumnParallelLinear`,https://github.com/facebookresearch/fairscale/blob/164cc0f3170b4a3951dd84dda29c3e1504ac4d6e/fairscale/nn/model_parallel/layers.py#L218, `paddle.distributed.meta_parallel.parallel_layers.mp_layers.ColumnParallelLinear`,https://github.com/PaddlePaddle/Paddle/blob/016766cc89fabc10181453ce70b701dd8ed019f6/python/paddle/distributed/fleet/layers/mpu/mp_layers.py#L153, torch 参数更多 , https://github.com/PaddlePaddle/docs/tree/develop/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.ColumnParallelLinear.md) | +|MANUAL_MAINTAINING-ITEM(`fairscale.nn.model_parallel.layers.ParallelEmbedding`,https://github.com/facebookresearch/fairscale/blob/164cc0f3170b4a3951dd84dda29c3e1504ac4d6e/fairscale/nn/model_parallel/layers.py#L152, `paddle.distributed.meta_parallel.parallel_layers.mp_layers.VocabParallelEmbedding`,https://github.com/PaddlePaddle/Paddle/blob/016766cc89fabc10181453ce70b701dd8ed019f6/python/paddle/distributed/fleet/layers/mpu/mp_layers.py#L37, torch 参数更多 , https://github.com/PaddlePaddle/docs/tree/develop/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.ParallelEmbedding.md) | +|MANUAL_MAINTAINING-ITEM(`fairscale.nn.model_parallel.layers.RowParallelLinear`,https://github.com/facebookresearch/fairscale/blob/164cc0f3170b4a3951dd84dda29c3e1504ac4d6e/fairscale/nn/model_parallel/layers.py#L299, `paddle.distributed.meta_parallel.parallel_layers.mp_layers.RowParallelLinear`,https://github.com/PaddlePaddle/Paddle/blob/016766cc89fabc10181453ce70b701dd8ed019f6/python/paddle/distributed/fleet/layers/mpu/mp_layers.py#L291,torch 参数更多 , https://github.com/PaddlePaddle/docs/tree/develop/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.RowParallelLinear.md) | + ***持续更新...*** diff --git a/docs/guides/model_convert/convert_from_pytorch/validate_mapping_in_api_difference.py b/docs/guides/model_convert/convert_from_pytorch/validate_mapping_in_api_difference.py index 16bc1308308..38cfc713bbc 100644 --- a/docs/guides/model_convert/convert_from_pytorch/validate_mapping_in_api_difference.py +++ b/docs/guides/model_convert/convert_from_pytorch/validate_mapping_in_api_difference.py @@ -407,6 +407,7 @@ def validate_mapping_table_macro_row(columns, row_idx, line_idx): "REFERENCE-MAPPING-ITEM", "NOT-IMPLEMENTED-ITEM", "REFERENCE-MAPPING-TABLE", + "MANUAL_MAINTAINING_PATTERN", ]: print(f"Unknown macro type: {macro_type} at line {line_idx}.") return False @@ -674,6 +675,11 @@ def auto_fill_index_from_api_diff(basedir, meta_dict) -> None: "NOT-IMPLEMENTED-ITEM" ): pass + # if before is MANUAL_MAINTAINING_PATTERN, replace + elif target[api_type][torch_api].startswith( + "MANUAL_MAINTAINING_PATTERN" + ): + pass # if before is X2Paddle, skip elif ( "https://github.com/PaddlePaddle/X2Paddle" @@ -758,10 +764,14 @@ def auto_fill_index_from_api_diff(basedir, meta_dict) -> None: for api, ref in od_apis.items(): if ref.startswith("NOT-IMPLEMENTED-ITEM"): f.write(f"| {ref} |\n") + for api, ref in od_apis.items(): + if ref.startswith("MANUAL_MAINTAINING_PATTERN"): + f.write(f"| {ref} |\n") for api, ref in od_apis.items(): if not ( ref.startswith("REFERENCE-MAPPING-ITEM") or ref.startswith("NOT-IMPLEMENTED-ITEM") + or ref.startswith("MANUAL_MAINTAINING_PATTERN") ): f.write(f"| {ref} |\n") f.write("\n") From f1b9ff10d0d8cb95563c9736814c1e5614c2a534 Mon Sep 17 00:00:00 2001 From: xuxinyi04 Date: Fri, 12 Jul 2024 07:28:49 +0000 Subject: [PATCH 04/12] fix_typo --- .../convert_from_pytorch/pytorch_api_mapping_cn.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/guides/model_convert/convert_from_pytorch/pytorch_api_mapping_cn.md b/docs/guides/model_convert/convert_from_pytorch/pytorch_api_mapping_cn.md index 66b44083821..eacd5212df4 100644 --- a/docs/guides/model_convert/convert_from_pytorch/pytorch_api_mapping_cn.md +++ b/docs/guides/model_convert/convert_from_pytorch/pytorch_api_mapping_cn.md @@ -28,7 +28,7 @@ | [torch.profiler.XX](#id21) | 主要为`torch.profiler.XX`类 API | | [torch.optim.XX](#id22) | 主要为`torch.optim.XX`类 API | | [torch.sparse.XX](#id12) | 主要为`torch.sparse.XX`类 API | -| [其他](#id13) | 其他 API | +| [torch 其他](#id13) | PyTorch 其他 API | | [fairscale.xx](#id23) | 第三方库 fairscale API | ## torch.XX API 映射列表 @@ -202,9 +202,9 @@ ***持续更新...*** -## 其他类 API 映射列表 +## PyTorch 其他类 API 映射列表 -梳理了其他类 API 的 PyTorch-PaddlePaddle API 映射列表。 +梳理了 PyTorch 其他类 API 的 PyTorch-PaddlePaddle API 映射列表。 | 序号 | Pytorch 最新 release | Paddle develop | 映射关系分类 | 备注 | | ----- | ----------- | ----------------- | ----------- | ------- | @@ -418,6 +418,7 @@ ## fairscale.XX API 映射列表 + | 序号 | Pytorch 最新 release | Paddle develop | 映射关系分类 | 备注 | | ----- | ----------- | ----------------- | ----------- | ------- | |MANUAL_MAINTAINING-ITEM(`fairscale.nn.model_parallel.initialize.get_model_parallel_rank`,https://github.com/facebookresearch/fairscale/blob/164cc0f3170b4a3951dd84dda29c3e1504ac4d6e/fairscale/nn/model_parallel/initialize.py#L155, `paddle.distributed.fleet.base.topology._HYBRID_PARALLEL_GROUP.get_model_parallel_rank`, https://github.com/PaddlePaddle/Paddle/blob/ddac1b431483ddc0f1ee600e799aa31fc0a75961/python/paddle/distributed/fleet/base/topology.py#L463, 无参数 , https://github.com/PaddlePaddle/docs/tree/develop/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.initialize.get_model_parallel_rank.md) | From d99e2751ae412e4003474ef334c488e0466bbafb Mon Sep 17 00:00:00 2001 From: xuxinyi04 Date: Tue, 16 Jul 2024 08:39:52 +0000 Subject: [PATCH 05/12] fix --- ...scale.nn.model_parallel.initialize.get_model_parallel_rank.md | 1 - ...nn.model_parallel.initialize.get_model_parallel_world_size.md | 1 - 2 files changed, 2 deletions(-) diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.initialize.get_model_parallel_rank.md b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.initialize.get_model_parallel_rank.md index 9ab7934b701..ea93872a806 100644 --- a/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.initialize.get_model_parallel_rank.md +++ b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.initialize.get_model_parallel_rank.md @@ -20,6 +20,5 @@ paddle.distributed.fleet.base.topology._HYBRID_PARALLEL_GROUP.get_model_parallel fairscale.nn.model_parallel.initialize.get_model_parallel_size() # Paddle 写法 -assert paddle.distributed.fleet.base.topology._HYBRID_PARALLEL_GROUP is not None paddle.distributed.fleet.base.topology._HYBRID_PARALLEL_GROUP.get_model_parallel_rank() ``` diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.initialize.get_model_parallel_world_size.md b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.initialize.get_model_parallel_world_size.md index 9db0bd92fd6..1fdfa6869b7 100644 --- a/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.initialize.get_model_parallel_world_size.md +++ b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.initialize.get_model_parallel_world_size.md @@ -19,6 +19,5 @@ paddle.distributed.fleet.base.topology._HYBRID_PARALLEL_GROUP._mp_degree fairscale.nn.model_parallel.initialize.get_model_parallel_size() # Paddle 写法 -assert paddle.distributed.fleet.base.topology._HYBRID_PARALLEL_GROUP is not None paddle.distributed.fleet.base.topology._HYBRID_PARALLEL_GROUP._mp_degree ``` From 4e6f2682ce95d55e5ba6c044109bbcaad4c07ab6 Mon Sep 17 00:00:00 2001 From: xuxinyi04 Date: Wed, 17 Jul 2024 14:45:02 +0000 Subject: [PATCH 06/12] fix --- .../others/torch.inference_mode.md | 3 +- .../others/torch.set_default_tensor_type.md | 4 +- ...llel.initialize.get_model_parallel_rank.md | 9 ---- ...nitialize.get_model_parallel_world_size.md | 9 ---- ...el.initialize.initialize_model_parallel.md | 14 ++--- ...el_parallel.layers.ColumnParallelLinear.md | 52 ++++++------------- ...model_parallel.layers.ParallelEmbedding.md | 46 ++++++---------- ...model_parallel.layers.RowParallelLinear.md | 52 ++++++------------- 8 files changed, 58 insertions(+), 131 deletions(-) diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference/others/torch.inference_mode.md b/docs/guides/model_convert/convert_from_pytorch/api_difference/others/torch.inference_mode.md index 5b4611e8074..d3a680871ff 100644 --- a/docs/guides/model_convert/convert_from_pytorch/api_difference/others/torch.inference_mode.md +++ b/docs/guides/model_convert/convert_from_pytorch/api_difference/others/torch.inference_mode.md @@ -18,7 +18,7 @@ inference_mode 会额外禁用视图跟踪和版本计数器,提高推理性 | PyTorch | PaddlePaddle | 备注 | | ----------- | ------------ | ----------------------------------------------------------------------------------------- | -| mode | func | mode 为函数时,仅参数名不同,作为上下文管理器使用时,mode=True 可忽略该参数,mode=False 时,应移除之(替换为空装饰器) | +| mode | func | mmode 为函数时,仅参数名不同;mode 为 bool 时,作为上下文管理器使用,其中 mode=True 可忽略该参数,mode=False 时,直接删除该代码 | ### 转写示例 ```python @@ -38,7 +38,6 @@ def doubler(x): return x * 2 # Paddle 写法 -@paddle_aux.empty_decorator def doubler(x): return x * 2 diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference/others/torch.set_default_tensor_type.md b/docs/guides/model_convert/convert_from_pytorch/api_difference/others/torch.set_default_tensor_type.md index ef749681ede..ba4c0958898 100644 --- a/docs/guides/model_convert/convert_from_pytorch/api_difference/others/torch.set_default_tensor_type.md +++ b/docs/guides/model_convert/convert_from_pytorch/api_difference/others/torch.set_default_tensor_type.md @@ -1,4 +1,4 @@ -## [ 输入参数用法不一致 ] torch.set_default_tensor_type +## [ 输入参数类型不一致 ] torch.set_default_tensor_type ### [torch.set_default_tensor_type](https://pytorch.org/docs/stable/generated/torch.set_default_tensor_type.html#torch-set-default-tensor-type) @@ -12,7 +12,7 @@ torch.set_default_tensor_type(d) paddle.set_default_dtype(d) ``` -两者功能一致,支持的参数类型相同,但参数用法不一致,需将 d 转换为 paddle 可识别类型,具体如下: +两者功能一致,支持的参数类型相同,但输入参数类型不一致,需将 d 转换为 paddle 可识别类型,具体如下: ### 参数映射 diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.initialize.get_model_parallel_rank.md b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.initialize.get_model_parallel_rank.md index ea93872a806..b982129fcae 100644 --- a/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.initialize.get_model_parallel_rank.md +++ b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.initialize.get_model_parallel_rank.md @@ -13,12 +13,3 @@ paddle.distributed.fleet.base.topology._HYBRID_PARALLEL_GROUP.get_model_parallel ``` 两者功能一致,均无参数。 - -### 转写示例 -```python -# PyTorch 写法 -fairscale.nn.model_parallel.initialize.get_model_parallel_size() - -# Paddle 写法 -paddle.distributed.fleet.base.topology._HYBRID_PARALLEL_GROUP.get_model_parallel_rank() -``` diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.initialize.get_model_parallel_world_size.md b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.initialize.get_model_parallel_world_size.md index 1fdfa6869b7..9f0ebf461a0 100644 --- a/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.initialize.get_model_parallel_world_size.md +++ b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.initialize.get_model_parallel_world_size.md @@ -12,12 +12,3 @@ fairscale.nn.model_parallel.initialize.get_model_parallel_size() paddle.distributed.fleet.base.topology._HYBRID_PARALLEL_GROUP._mp_degree ``` 两者功能一致,均无参数。 - -### 转写示例 -```python -# PyTorch 写法 -fairscale.nn.model_parallel.initialize.get_model_parallel_size() - -# Paddle 写法 -paddle.distributed.fleet.base.topology._HYBRID_PARALLEL_GROUP._mp_degree -``` diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.initialize.initialize_model_parallel.md b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.initialize.initialize_model_parallel.md index 7bc16f05d2d..c8e307ed659 100644 --- a/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.initialize.initialize_model_parallel.md +++ b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.initialize.initialize_model_parallel.md @@ -10,13 +10,13 @@ fairscale.nn.model_parallel.initialize.initialize_model_parallel() ### 参数映射 -| fairscale | PaddlePaddle | 备注 | -| --------- | ------------ | -------- | -| model_parallel_size_ | | 模型并行规模 | -| pipeline_length | | 流水线并行规模 | -| model_parallel_backend | | 模型并行通信后端 | -| pipeline_backend | | 流水线并行通信后端 | -| ddp_backend | | 数据并行通信后端| +| fairscale | PaddlePaddle | 备注 | +| ---------------------- | ------------ |--------------- | +| model_parallel_size_ | - | 模型并行规模 | +| pipeline_length | - | 流水线并行规模 | +| model_parallel_backend | - | 模型并行通信后端 | +| pipeline_backend | - | 流水线并行通信后端 | +| ddp_backend | - | 数据并行通信后端 | ### 转写示例 diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.ColumnParallelLinear.md b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.ColumnParallelLinear.md index 462a5987dc0..244d0b01626 100644 --- a/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.ColumnParallelLinear.md +++ b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.ColumnParallelLinear.md @@ -3,48 +3,28 @@ ### [fairscale.nn.model_parallel.layers.ColumnParallelLinear](https://github.com/facebookresearch/fairscale/blob/164cc0f3170b4a3951dd84dda29c3e1504ac4d6e/fairscale/nn/model_parallel/layers.py#L218) ```python -fairscale.nn.model_parallel.initialize.ColumnParallelLinear(in_features,out_features,bias,gather_output,init_method,stride,keep_master_weight_for_test) +fairscale.nn.model_parallel.initialize.ColumnParallelLinear(in_features: int, out_features: int, bias: bool = True, gather_output: bool = True, init_method: Callable[[torch.Tensor], torch.Tensor] = init.xavier_normal_, stride: int = 1, keep_master_weight_for_test: bool = False) ``` ### [paddle.distributed.meta_parallel.parallel_layers.mp_layers.ColumnParallelLinear](https://github.com/PaddlePaddle/Paddle/blob/016766cc89fabc10181453ce70b701dd8ed019f6/python/paddle/distributed/fleet/layers/mpu/mp_layers.py#L153) ```python -paddle.distributed.meta_parallel.parallel_layers.mp_layers.ColumnParallelLinear(in_features,out_features,weight_attr,has_bias,gather_output,fuse_matmul_bias,mp_group,name) +paddle.distributed.meta_parallel.parallel_layers.mp_layers.ColumnParallelLinear(in_features, out_features, weight_attr=None, has_bias=None, gather_output=True, fuse_matmul_bias=False, mp_group=None, name=None) ``` -两者功能大体一致,torch 的参数更多。 +PyTorch 相比 Paddle 支持更多其他参数,具体如下: ### 参数映射 -| fairscale | PaddlePaddle | 备注 | -| --------- | ------------ | -------- | -| in_features | in_features| 输入特征数 | -| out_features |out_features |输出特征数| -| bias |has_bias | 是否增加 bias | -| gather_output |gather_output | 是否对每个 rank 的输出 allgather | -| init_method | | 参数初始化方法| -| |weight_attr | 网络层参数属性| -| stride | | 线性层滑动步长 | -| keep_master_weight_for_test | | 返回主参数用于测试 | -| |fuse_matmul_bias | 是否融合矩阵乘和加 bias 操作 | -| | mp_group| 模型并行组| -| | name| 网络层名称| - -### 转写示例 - -```python -# Pytorch 写法 -fairscale.nn.model_parallel.initialize.ColumnParallelLinear(in_features=in_features, - out_features=out_features,bias=False,gather_out=False) - -# Paddle 写法 -paddle.distributed.meta_parallel.parallel_layers.mp_layers.ColumnParallelLinear(in_features=in_features, - out_features=in_features,has_bias=False, gather_output=False) - -# Pytorch 写法 -fairscale.nn.model_parallel.initialize.ColumnParallelLinear(in_features=in_features, - out_features=out_features) - -# Paddle 写法 -paddle.distributed.meta_parallel.parallel_layers.mp_layers.ColumnParallelLinear(in_features=in_features, - out_features=in_features,has_bias=True) -``` +| fairscale | PaddlePaddle | 备注 | +| ----------------------------| ---------------- | -------- | +| in_features | in_features | 输入特征数 | +| out_features | out_features | 输出特征数 | +| bias | has_bias | 是否增加 bias | +| gather_output | gather_output | 是否对每个 rank 的输出 allgather | +| init_method | - | 参数初始化方法,Paddle 无此参数,一般对网络训练结果影响不大,可直接删除 | +| - | weight_attr | 网络层参数属性 | +| stride | - | 线性层切分后参数块的 stride, 用于特殊的存储格式,Paddle 无此参数,一般对网络训练结果影响不大,可直接删除 | +| keep_master_weight_for_test | - | 返回主参数用于测试,Paddle 无此参数,一般对网络训练结果影响不大,可直接删除 | +| - | fuse_matmul_bias | 是否融合矩阵乘和加 bias 操作 | +| - | mp_group | 模型并行组 | +| - | name | 网络层名称 | diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.ParallelEmbedding.md b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.ParallelEmbedding.md index 8627a73b9b4..32951d69a96 100644 --- a/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.ParallelEmbedding.md +++ b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.ParallelEmbedding.md @@ -3,42 +3,28 @@ ### [fairscale.nn.model_parallel.layers.ParallelEmbedding](https://github.com/facebookresearch/fairscale/blob/164cc0f3170b4a3951dd84dda29c3e1504ac4d6e/fairscale/nn/model_parallel/layers.py#L152) ```python -fairscale.nn.model_parallel.initialize.ParallelEmbedding(num_embeddings,embedding_dim,padding_idx,max_norm,norm_type,scale_grad_by_freq,sparse,init_method,keep_master_weight_for_test) +fairscale.nn.model_parallel.initialize.ParallelEmbedding(num_embeddings: int, embedding_dim: int ,padding_idx: Optional[int] = None, max_norm: Optional[float] = None, norm_type: float = 2.0, scale_grad_by_freq: bool = False, sparse: bool = False, init_method: Callable[[torch.Tensor], torch.Tensor] = init.xavier_normal_, keep_master_weight_for_test: bool = False) ``` ### [paddle.distributed.meta_parallel.parallel_layers.mp_layers.VocabParallelEmbedding](https://github.com/PaddlePaddle/Paddle/blob/016766cc89fabc10181453ce70b701dd8ed019f6/python/paddle/distributed/fleet/layers/mpu/mp_layers.py#L37) ```python -paddle.distributed.meta_parallel.parallel_layers.mp_layers.VocabParallelEmbedding(num_embeddings,embedding_dim,weight_attr,mp_group,name) +paddle.distributed.meta_parallel.parallel_layers.mp_layers.VocabParallelEmbedding(num_embeddings, embedding_dim, weight_attr=None, mp_group=None, name=None) ``` -两者功能大体一致,但内部实现细节不一样,ParallelEmbedding 的切分方向沿着 embedding 方向,VocabParallelEmbedding 的切分方向沿着 vocab(词汇表)方向。 +两者功能大体一致,但内部实现细节不一样,ParallelEmbedding 的切分方向沿着 embedding 方向,VocabParallelEmbedding 的切分方向沿着 vocab(词汇表)方向,故在多卡训练时,load 参数时需手动修改以匹配参数切分方式的不同。 ### 参数映射 -| fairscale | PaddlePaddle | 备注 | -| --------- | ------------ | -------- | -| num_embeddings | num_embeddings|词汇表大小 | -| embedding_dim |embedding_dim |embedding 的维度大小| -| padding_idx | | 填充下标处的数据对梯度无贡献 | -| max_norm | | 范数大于 maxnorm 的数值被设置为 maxnorm| -| norm_type | | 设置 p 范数| -| sparse | | 是否为稀疏向量 | -| scale_grad_by_freq| | 是否根据 batch 内单词的频数的倒数缩放梯度 | -| init_method | | 参数初始化方法| -| keep_master_weight_for_test | | 返回主参数用于测试 | -| | mp_group| 模型并行组| -| | name| 网络层名称| - - -### 转写示例 - -```python -# Pytorch 写法 -fairscale.nn.model_parallel.initialize.ParallelEmbedding(num_embeddings=num_embeddings, - embedding_dim=embedding_dim) - -# Paddle 写法 -paddle.distributed.meta_parallel.parallel_layers.mp_layers.VocabParallelEmbedding(num_embeddings=num_embeddings, - embedding_dim=embedding_dim) - -``` +| fairscale | PaddlePaddle | 备注 | +| ---------------------------- | -------------- | -------- | +| num_embeddings | num_embeddings | 词汇表大小 | +| embedding_dim | embedding_dim | embedding 的维度大小| +| padding_idx | - | 填充数值,Paddle 无此参数,暂无转写方式 | +| max_norm | - | 范数大于 maxnorm 的数值被设置为 maxnorm | +| norm_type | - | 设置 p 范数,Paddle 无此参数,暂无转写方式 | +| sparse | - | 是否为稀疏向量,Paddle 无此参数,暂无转写方式 | +| scale_grad_by_freq | - | 是否根据 batch 内单词的频数的倒数缩放梯度,Paddle 无此参数,暂无转写方式| +| init_method | - | 参数初始化方法,Paddle 无此参数,一般对网络训练结果影响不大,可直接删除 | +| keep_master_weight_for_test | - | 返回主参数用于测试,Paddle 无此参数,一般对网络训练结果影响不大,可直接删除 | +| - | mp_group | 模型并行组 | +| - | name | 网络层名称 | diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.RowParallelLinear.md b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.RowParallelLinear.md index f6ccd4b4045..46cf6471bb3 100644 --- a/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.RowParallelLinear.md +++ b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.RowParallelLinear.md @@ -3,48 +3,28 @@ ### [fairscale.nn.model_parallel.layers.RowParallelLinear](https://github.com/facebookresearch/fairscale/blob/164cc0f3170b4a3951dd84dda29c3e1504ac4d6e/fairscale/nn/model_parallel/layers.py#L299) ```python -fairscale.nn.model_parallel.initialize.RowParallelLinear(in_features,out_features,bias,input_is_parallel,init_method,stride,keep_master_weight_for_test) +fairscale.nn.model_parallel.initialize.RowParallelLinear(in_features: int, out_features: int, bias: bool = True, input_is_parallel: bool = False, init_method: Callable[[torch.Tensor], torch.Tensor] = init.xavier_normal_, stride: int = 1, keep_master_weight_for_test: bool = False) ``` ### [paddle.distributed.meta_parallel.parallel_layers.mp_layers.RowParallelLinear](https://github.com/PaddlePaddle/Paddle/blob/016766cc89fabc10181453ce70b701dd8ed019f6/python/paddle/distributed/fleet/layers/mpu/mp_layers.py#L291) ```python -paddle.distributed.meta_parallel.parallel_layers.mp_layers.RowParallelLinear(in_features,out_features,weight_attr,has_bias,input_is_parallel,fuse_matmul_bias,mp_group,name) +paddle.distributed.meta_parallel.parallel_layers.mp_layers.RowParallelLinear(in_features, out_features, weight_attr=None, has_bias=True, input_is_parallel=False, fuse_matmul_bias=False, mp_group=None, name=None) ``` -两者功能大体一致,参数不一致。 +PyTorch 相比 Paddle 支持更多其他参数,具体如下: ### 参数映射 -| fairscale | PaddlePaddle | 备注 | -| --------- | ------------ | -------- | -| in_features | in_features| 输入特征数 | -| out_features |out_features |输出特征数| -| bias |has_bias | 是否增加 bias | -| input_is_parallel |input_is_parallel | 输入是否在 GPUs 上进行过分割,如果是就不再分割 | -| init_method | | 参数初始化方法| -| |weight_attr | 网络层参数属性| -| stride | | 线性层滑动步长 | -| keep_master_weight_for_test | | 返回主参数用于测试 | -| |fuse_matmul_bias | 是否融合 matmul 和 bias 操作 | -| | mp_group| 模型并行组 | -| | name| 网络层名称 | - -### 转写示例 - -```python -# Pytorch 写法 -fairscale.nn.model_parallel.initialize.RowParallelLinear(in_features=in_features, - out_features=out_features,bias=False,input_is_parallel=False) - -# Paddle 写法 -paddle.distributed.meta_parallel.parallel_layers.mp_layers.RowParallelLinear(in_features=in_features, - out_features=in_features,has_bias=False, input_is_parallel=False) - -# Pytorch 写法 -fairscale.nn.model_parallel.initialize.RowParallelLinear(in_features=in_features, - out_features=out_features) - -# Paddle 写法 -paddle.distributed.meta_parallel.parallel_layers.mp_layers.RowParallelLinear(in_features=in_features, - out_features=in_features,has_bias=True) -``` +| fairscale | PaddlePaddle | 备注 | +| ----------------------------| ---------------- | -------- | +| in_features | in_features | 输入特征数 | +| out_features | out_features | 输出特征数 | +| bias | has_bias | 是否增加 bias | +| input_is_parallel | input_is_parallel| 输入是否在 GPUs 上进行过分割,如果是就不再分割 | +| init_method |- | 参数初始化方法,Paddle 无此参数,一般对网络训练结果影响不大,可直接删除 | +|- | weight_attr | 网络层参数属性| +| stride |- | 线性层切分后参数块的 stride, 用于特殊的存储格式,Paddle 无此参数,一般对网络训练结果影响不大,可直接删除 | +| keep_master_weight_for_test |- | 返回主参数用于测试,Paddle 无此参数,一般对网络训练结果影响不大,可直接删除 | +|- | fuse_matmul_bias | 是否融合矩阵乘和加 bias 操作 | +|- | mp_group | 模型并行组| +|- | name | 网络层名称| From 5a203a691923cd81df40c0cffd00ff6c03e2d858 Mon Sep 17 00:00:00 2001 From: xuxinyi04 Date: Thu, 18 Jul 2024 02:38:16 +0000 Subject: [PATCH 07/12] fix --- .../fairscale.nn.model_parallel.layers.ParallelEmbedding.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.ParallelEmbedding.md b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.ParallelEmbedding.md index 32951d69a96..4fe9e102031 100644 --- a/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.ParallelEmbedding.md +++ b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.ParallelEmbedding.md @@ -20,7 +20,7 @@ paddle.distributed.meta_parallel.parallel_layers.mp_layers.VocabParallelEmbeddin | num_embeddings | num_embeddings | 词汇表大小 | | embedding_dim | embedding_dim | embedding 的维度大小| | padding_idx | - | 填充数值,Paddle 无此参数,暂无转写方式 | -| max_norm | - | 范数大于 maxnorm 的数值被设置为 maxnorm | +| max_norm | - | 范数大于 maxnorm 的数值被设置为 maxnorm, Paddle 无此参数,暂无转写方式 | | norm_type | - | 设置 p 范数,Paddle 无此参数,暂无转写方式 | | sparse | - | 是否为稀疏向量,Paddle 无此参数,暂无转写方式 | | scale_grad_by_freq | - | 是否根据 batch 内单词的频数的倒数缩放梯度,Paddle 无此参数,暂无转写方式| From 841a1faf177e746d9a52d2a15b1b74872aef389d Mon Sep 17 00:00:00 2001 From: xuxinyi04 Date: Thu, 18 Jul 2024 07:34:43 +0000 Subject: [PATCH 08/12] fix_typo --- .../others/torch.inference_mode.md | 4 ++-- .../others/torch.set_default_tensor_type.md | 13 ++++++----- ...el.initialize.initialize_model_parallel.md | 10 --------- ...model_parallel.layers.ParallelEmbedding.md | 22 +++++++++---------- ...model_parallel.layers.RowParallelLinear.md | 22 +++++++++---------- 5 files changed, 31 insertions(+), 40 deletions(-) diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference/others/torch.inference_mode.md b/docs/guides/model_convert/convert_from_pytorch/api_difference/others/torch.inference_mode.md index d3a680871ff..d2b54b1262c 100644 --- a/docs/guides/model_convert/convert_from_pytorch/api_difference/others/torch.inference_mode.md +++ b/docs/guides/model_convert/convert_from_pytorch/api_difference/others/torch.inference_mode.md @@ -18,9 +18,10 @@ inference_mode 会额外禁用视图跟踪和版本计数器,提高推理性 | PyTorch | PaddlePaddle | 备注 | | ----------- | ------------ | ----------------------------------------------------------------------------------------- | -| mode | func | mmode 为函数时,仅参数名不同;mode 为 bool 时,作为上下文管理器使用,其中 mode=True 可忽略该参数,mode=False 时,直接删除该代码 | +| mode | func | mode 为函数时,仅参数名不同;mode 为 bool 时,作为上下文管理器使用,其中 mode=True 可忽略该参数,mode=False 时,直接删除该代码。 | ### 转写示例 +#### mode : 函数或 bool 值 ```python # PyTorch 写法 @torch.inference_mode() @@ -40,5 +41,4 @@ def doubler(x): # Paddle 写法 def doubler(x): return x * 2 - ``` diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference/others/torch.set_default_tensor_type.md b/docs/guides/model_convert/convert_from_pytorch/api_difference/others/torch.set_default_tensor_type.md index ba4c0958898..216edc8a8cc 100644 --- a/docs/guides/model_convert/convert_from_pytorch/api_difference/others/torch.set_default_tensor_type.md +++ b/docs/guides/model_convert/convert_from_pytorch/api_difference/others/torch.set_default_tensor_type.md @@ -12,17 +12,18 @@ torch.set_default_tensor_type(d) paddle.set_default_dtype(d) ``` -两者功能一致,支持的参数类型相同,但输入参数类型不一致,需将 d 转换为 paddle 可识别类型,具体如下: +两者功能一致但但输入参数类型不一致,torch 支持浮点张量类型或其名称,paddle 仅支持 dtype,需要转写,具体如下: ### 参数映射 -| PyTorch | PaddlePaddle | 备注 | -| ----------- | ------------ | -------------------------------------------------------------------------------------- | -| d | d | 全局默认数据类型,均支持所有浮点类型| +| PyTorch | PaddlePaddle | 备注 | +| ----------- | ------------ | ------------------- | +| d | d | 浮点张量类型或其名称. | ### 转写示例 +#### d: 浮点张量类型或其名称 ```python -# pytorch +# pytorch 写法 torch.set_default_tensor_type(torch.HalfTensor) torch.set_default_tensor_type('torch.HalfTensor') torch.set_default_tensor_type(torch.FloatTensor) @@ -30,7 +31,7 @@ torch.set_default_tensor_type('torch.FloatTensor') torch.set_default_tensor_type(torch.DoubleTensor) torch.set_default_tensor_type('torch.DoubleTensor') -# paddle +# paddle 写法 paddle.set_default_dtype('float16') paddle.set_default_dtype('float16') paddle.set_default_dtype('float32') diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.initialize.initialize_model_parallel.md b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.initialize.initialize_model_parallel.md index c8e307ed659..26d5a111f53 100644 --- a/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.initialize.initialize_model_parallel.md +++ b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.initialize.initialize_model_parallel.md @@ -8,16 +8,6 @@ fairscale.nn.model_parallel.initialize.initialize_model_parallel() 对模型并行设置进行初始化; Paddle 无此 API,需要组合实现。 -### 参数映射 - -| fairscale | PaddlePaddle | 备注 | -| ---------------------- | ------------ |--------------- | -| model_parallel_size_ | - | 模型并行规模 | -| pipeline_length | - | 流水线并行规模 | -| model_parallel_backend | - | 模型并行通信后端 | -| pipeline_backend | - | 流水线并行通信后端 | -| ddp_backend | - | 数据并行通信后端 | - ### 转写示例 ```python diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.ParallelEmbedding.md b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.ParallelEmbedding.md index 4fe9e102031..efc3fb13ef3 100644 --- a/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.ParallelEmbedding.md +++ b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.ParallelEmbedding.md @@ -17,14 +17,14 @@ paddle.distributed.meta_parallel.parallel_layers.mp_layers.VocabParallelEmbeddin | fairscale | PaddlePaddle | 备注 | | ---------------------------- | -------------- | -------- | -| num_embeddings | num_embeddings | 词汇表大小 | -| embedding_dim | embedding_dim | embedding 的维度大小| -| padding_idx | - | 填充数值,Paddle 无此参数,暂无转写方式 | -| max_norm | - | 范数大于 maxnorm 的数值被设置为 maxnorm, Paddle 无此参数,暂无转写方式 | -| norm_type | - | 设置 p 范数,Paddle 无此参数,暂无转写方式 | -| sparse | - | 是否为稀疏向量,Paddle 无此参数,暂无转写方式 | -| scale_grad_by_freq | - | 是否根据 batch 内单词的频数的倒数缩放梯度,Paddle 无此参数,暂无转写方式| -| init_method | - | 参数初始化方法,Paddle 无此参数,一般对网络训练结果影响不大,可直接删除 | -| keep_master_weight_for_test | - | 返回主参数用于测试,Paddle 无此参数,一般对网络训练结果影响不大,可直接删除 | -| - | mp_group | 模型并行组 | -| - | name | 网络层名称 | +| num_embeddings | num_embeddings | 词汇表大小。 | +| embedding_dim | embedding_dim | embedding 的维度大小。| +| padding_idx | - | 填充数值,Paddle 无此参数,暂无转写方式。 | +| max_norm | - | 范数大于 maxnorm 的数值被设置为 maxnorm, Paddle 无此参数,暂无转写方式。 | +| norm_type | - | 设置 p 范数,Paddle 无此参数,暂无转写方式。 | +| sparse | - | 是否为稀疏向量,Paddle 无此参数,暂无转写方式。 | +| scale_grad_by_freq | - | 是否根据 batch 内单词的频数的倒数缩放梯度,Paddle 无此参数,暂无转写方式。| +| init_method | - | 参数初始化方法,Paddle 无此参数,一般对网络训练结果影响不大,可直接删除。 | +| keep_master_weight_for_test | - | 返回主参数用于测试,Paddle 无此参数,一般对网络训练结果影响不大,可直接删除。 | +| - | mp_group | 模型并行组,PyTorch 无此参数,Paddle 保持默认即可。 | +| - | name | 网络层名称,PyTorch 无此参数,Paddle 保持默认即可。 | diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.RowParallelLinear.md b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.RowParallelLinear.md index 46cf6471bb3..fb824fd2c32 100644 --- a/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.RowParallelLinear.md +++ b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.RowParallelLinear.md @@ -17,14 +17,14 @@ PyTorch 相比 Paddle 支持更多其他参数,具体如下: | fairscale | PaddlePaddle | 备注 | | ----------------------------| ---------------- | -------- | -| in_features | in_features | 输入特征数 | -| out_features | out_features | 输出特征数 | -| bias | has_bias | 是否增加 bias | -| input_is_parallel | input_is_parallel| 输入是否在 GPUs 上进行过分割,如果是就不再分割 | -| init_method |- | 参数初始化方法,Paddle 无此参数,一般对网络训练结果影响不大,可直接删除 | -|- | weight_attr | 网络层参数属性| -| stride |- | 线性层切分后参数块的 stride, 用于特殊的存储格式,Paddle 无此参数,一般对网络训练结果影响不大,可直接删除 | -| keep_master_weight_for_test |- | 返回主参数用于测试,Paddle 无此参数,一般对网络训练结果影响不大,可直接删除 | -|- | fuse_matmul_bias | 是否融合矩阵乘和加 bias 操作 | -|- | mp_group | 模型并行组| -|- | name | 网络层名称| +| in_features | in_features | 输入特征数。 | +| out_features | out_features | 输出特征数。 | +| bias | has_bias | 是否增加 bias。 | +| input_is_parallel | input_is_parallel| 输入是否在 GPUs 上进行过分割,如果是就不再分割。 | +| init_method |- | 参数初始化方法,Paddle 无此参数,一般对网络训练结果影响不大,可直接删除。 | +|- | weight_attr | 网络层参数属性,PyTorch 无此参数,Paddle 保持默认即可。| +| stride |- | 线性层切分后参数块的 stride, 用于特殊的存储格式,Paddle 无此参数,一般对网络训练结果影响不大,可直接删除。 | +| keep_master_weight_for_test |- | 返回主参数用于测试,Paddle 无此参数,一般对网络训练结果影响不大,可直接删除。 | +|- | fuse_matmul_bias | 是否融合矩阵乘和加 bias 操作,PyTorch 无此参数,Paddle 保持默认即可。 | +|- | mp_group | 模型并行组,PyTorch 无此参数,Paddle 保持默认即可。| +|- | name | 网络层名称,PyTorch 无此参数,Paddle 保持默认即可。| From 35ed973016586a96a5b87275505cf24da247282b Mon Sep 17 00:00:00 2001 From: xuxinyi04 Date: Thu, 18 Jul 2024 07:42:38 +0000 Subject: [PATCH 09/12] fix_typo --- .../others/torch.set_default_tensor_type.md | 2 +- ...el_parallel.layers.ColumnParallelLinear.md | 22 +++++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference/others/torch.set_default_tensor_type.md b/docs/guides/model_convert/convert_from_pytorch/api_difference/others/torch.set_default_tensor_type.md index 216edc8a8cc..b7972210742 100644 --- a/docs/guides/model_convert/convert_from_pytorch/api_difference/others/torch.set_default_tensor_type.md +++ b/docs/guides/model_convert/convert_from_pytorch/api_difference/others/torch.set_default_tensor_type.md @@ -12,7 +12,7 @@ torch.set_default_tensor_type(d) paddle.set_default_dtype(d) ``` -两者功能一致但但输入参数类型不一致,torch 支持浮点张量类型或其名称,paddle 仅支持 dtype,需要转写,具体如下: +两者功能一致但输入参数类型不一致,torch 支持浮点张量类型或其名称,paddle 仅支持 dtype,需要转写,具体如下: ### 参数映射 diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.ColumnParallelLinear.md b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.ColumnParallelLinear.md index 244d0b01626..e31b57d75d3 100644 --- a/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.ColumnParallelLinear.md +++ b/docs/guides/model_convert/convert_from_pytorch/api_difference_third_party/fairscale/fairscale.nn.model_parallel.layers.ColumnParallelLinear.md @@ -17,14 +17,14 @@ PyTorch 相比 Paddle 支持更多其他参数,具体如下: | fairscale | PaddlePaddle | 备注 | | ----------------------------| ---------------- | -------- | -| in_features | in_features | 输入特征数 | -| out_features | out_features | 输出特征数 | -| bias | has_bias | 是否增加 bias | -| gather_output | gather_output | 是否对每个 rank 的输出 allgather | -| init_method | - | 参数初始化方法,Paddle 无此参数,一般对网络训练结果影响不大,可直接删除 | -| - | weight_attr | 网络层参数属性 | -| stride | - | 线性层切分后参数块的 stride, 用于特殊的存储格式,Paddle 无此参数,一般对网络训练结果影响不大,可直接删除 | -| keep_master_weight_for_test | - | 返回主参数用于测试,Paddle 无此参数,一般对网络训练结果影响不大,可直接删除 | -| - | fuse_matmul_bias | 是否融合矩阵乘和加 bias 操作 | -| - | mp_group | 模型并行组 | -| - | name | 网络层名称 | +| in_features | in_features | 输入特征数。 | +| out_features | out_features | 输出特征数。 | +| bias | has_bias | 是否增加 bias。 | +| gather_output | gather_output | 是否对每个 rank 的输出 allgather。 | +| init_method | - | 参数初始化方法,Paddle 无此参数,一般对网络训练结果影响不大,可直接删除。 | +| - | weight_attr | 网络层参数属性,PyTorch 无此参数,Paddle 保持默认即可。 | +| stride | - | 线性层切分后参数块的 stride, 用于特殊的存储格式,Paddle 无此参数,一般对网络训练结果影响不大,可直接删除。 | +| keep_master_weight_for_test | - | 返回主参数用于测试,Paddle 无此参数,一般对网络训练结果影响不大,可直接删除。 | +| - | fuse_matmul_bias | 是否融合矩阵乘和加 bias 操作,PyTorch 无此参数,Paddle 保持默认即可。 | +| - | mp_group | 模型并行组,PyTorch 无此参数,Paddle 保持默认即可。 | +| - | name | 网络层名称,PyTorch 无此参数,Paddle 保持默认即可。 | From 375c9e834518e6d35bc683755f8be7e649256140 Mon Sep 17 00:00:00 2001 From: xuxinyi04 Date: Thu, 18 Jul 2024 07:49:01 +0000 Subject: [PATCH 10/12] remove --- .../others/torch.set_default_tensor_type.md | 41 ------------------- 1 file changed, 41 deletions(-) delete mode 100644 docs/guides/model_convert/convert_from_pytorch/api_difference/others/torch.set_default_tensor_type.md diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference/others/torch.set_default_tensor_type.md b/docs/guides/model_convert/convert_from_pytorch/api_difference/others/torch.set_default_tensor_type.md deleted file mode 100644 index b7972210742..00000000000 --- a/docs/guides/model_convert/convert_from_pytorch/api_difference/others/torch.set_default_tensor_type.md +++ /dev/null @@ -1,41 +0,0 @@ -## [ 输入参数类型不一致 ] torch.set_default_tensor_type - -### [torch.set_default_tensor_type](https://pytorch.org/docs/stable/generated/torch.set_default_tensor_type.html#torch-set-default-tensor-type) - -```python -torch.set_default_tensor_type(d) -``` - -### [paddle.set_default_dtype](https://www.paddlepaddle.org.cn/documentation/docs/zh/api/paddle/set_default_dtype_cn.html) - -```python -paddle.set_default_dtype(d) -``` - -两者功能一致但输入参数类型不一致,torch 支持浮点张量类型或其名称,paddle 仅支持 dtype,需要转写,具体如下: - -### 参数映射 - -| PyTorch | PaddlePaddle | 备注 | -| ----------- | ------------ | ------------------- | -| d | d | 浮点张量类型或其名称. | - -### 转写示例 -#### d: 浮点张量类型或其名称 -```python -# pytorch 写法 -torch.set_default_tensor_type(torch.HalfTensor) -torch.set_default_tensor_type('torch.HalfTensor') -torch.set_default_tensor_type(torch.FloatTensor) -torch.set_default_tensor_type('torch.FloatTensor') -torch.set_default_tensor_type(torch.DoubleTensor) -torch.set_default_tensor_type('torch.DoubleTensor') - -# paddle 写法 -paddle.set_default_dtype('float16') -paddle.set_default_dtype('float16') -paddle.set_default_dtype('float32') -paddle.set_default_dtype('float32') -paddle.set_default_dtype('float64') -paddle.set_default_dtype('float64') -``` From 2ee555902763b3811d4a8c84c3d34430c8ca37f7 Mon Sep 17 00:00:00 2001 From: xuxinyi04 Date: Thu, 18 Jul 2024 07:59:42 +0000 Subject: [PATCH 11/12] fix_typo --- .../{ops => others}/torch.set_default_tensor_type.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) rename docs/guides/model_convert/convert_from_pytorch/api_difference/{ops => others}/torch.set_default_tensor_type.md (60%) diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference/ops/torch.set_default_tensor_type.md b/docs/guides/model_convert/convert_from_pytorch/api_difference/others/torch.set_default_tensor_type.md similarity index 60% rename from docs/guides/model_convert/convert_from_pytorch/api_difference/ops/torch.set_default_tensor_type.md rename to docs/guides/model_convert/convert_from_pytorch/api_difference/others/torch.set_default_tensor_type.md index acf827505a3..a8d750aaecf 100644 --- a/docs/guides/model_convert/convert_from_pytorch/api_difference/ops/torch.set_default_tensor_type.md +++ b/docs/guides/model_convert/convert_from_pytorch/api_difference/others/torch.set_default_tensor_type.md @@ -1,4 +1,4 @@ -## [ 输入参数用法不一致 ]torch.set_default_tensor_type +## [ 输入参数类型不一致 ]torch.set_default_tensor_type ### [torch.set\_default\_tensor\_type](https://pytorch.org/docs/stable/generated/torch.set_default_tensor_type.html) @@ -12,17 +12,17 @@ torch.set_default_tensor_type(t) paddle.set_default_dtype(d) ``` -其中 PyTorch 与 Paddle 的参数类型不一致,具体如下: +两者功能一致但输入参数类型不一致,具体如下: ### 参数映射 | PyTorch | PaddlePaddle | 备注 | | ------- | ------------ | -- | -| t | d | 指定的默认张量类型,参数类型不一致。PyTorch 支持张量类型或其名称字符串(如 `torch.FloatTensor`,Paddle 支持直接指定 `dtype`(如 `paddle.float32`),需要转写。 | +| t | d | 浮点张量类型或其名称。PyTorch 支持 `torch.FloatTensor` 或 "torch.FloatTensor",Paddle 只支持 `dtype`(如 `paddle.float32`),需要转写。 | ### 转写示例 -#### t 张量类型 +#### t 浮点张量类型或其名称 ```python # PyTorch From 0ed00f905c6efd9f11794806b093a88f2b4d4de8 Mon Sep 17 00:00:00 2001 From: xuxinyi04 Date: Thu, 18 Jul 2024 09:07:34 +0000 Subject: [PATCH 12/12] move --- .../{others => ops}/torch.set_default_tensor_type.md | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename docs/guides/model_convert/convert_from_pytorch/api_difference/{others => ops}/torch.set_default_tensor_type.md (100%) diff --git a/docs/guides/model_convert/convert_from_pytorch/api_difference/others/torch.set_default_tensor_type.md b/docs/guides/model_convert/convert_from_pytorch/api_difference/ops/torch.set_default_tensor_type.md similarity index 100% rename from docs/guides/model_convert/convert_from_pytorch/api_difference/others/torch.set_default_tensor_type.md rename to docs/guides/model_convert/convert_from_pytorch/api_difference/ops/torch.set_default_tensor_type.md