Skip to content

Commit

Permalink
Fix spece spec, etc (#63092)
Browse files Browse the repository at this point in the history
* Fix

* ci
  • Loading branch information
co63oc authored Apr 2, 2024
1 parent f04e0d2 commit 993e06b
Show file tree
Hide file tree
Showing 23 changed files with 71 additions and 71 deletions.
2 changes: 1 addition & 1 deletion python/cinn/auto_schedule/cost_model/xgb_cost_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def load(self, path):
self.booster = xgb.Booster()
self.booster.load_model(path)
# Should we save/load config parameters? Not now because it is pre-set.
# But we should do that here if that's changable in the future.
# But we should do that here if that's changeable in the future.

def update(self, samples, labels):
# xgb doesn't support incremental training, we leave this method as TODO
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def update_dims_mapping(dist_op):
output_spec = get_dist_tensor_spec(dist_op, output_arg_name, False)

# step2: infer spmd
# TODO reivse me
# TODO revise me
op_type = op_desc.type()
rule = get_phi_spmd_rule(op_type)
fw_results = rule.infer_forward(*input_specs)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -515,7 +515,7 @@ def update_dims_mapping_matmul(dist_op):
trans_x = False
trans_y = False

# TODO (zhangyichen) replace dist tensor spece by dist tensor in future.
# TODO (zhangyichen) replace dist tensor spec by dist tensor in future.
x_spec = get_dist_tensor_spec(dist_op, x_name)
y_spec = get_dist_tensor_spec(dist_op, y_name)
out_spec = get_dist_tensor_spec(dist_op, out_name, False)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def update_dims_mapping(dist_op):
keep_dim = op_desc.attr('keep_dim')
dims = op_desc.attr('dim')

# TODO (zhangyichen) replace dist tensor spece by dist tensor in future.
# TODO (zhangyichen) replace dist tensor spec by dist tensor in future.
input_spec = get_dist_tensor_spec(dist_op, input_arg_name)
output_spec = get_dist_tensor_spec(dist_op, output_arg_name, False)
# len(dims) == 0 means reduce_all
Expand Down Expand Up @@ -118,18 +118,18 @@ def is_partial_reduce(axes, dims_mapping):
register_distributed_operator_impl_container(DistributedReduceSum("reduce_sum"))


class DistributedReduceSumPrimtive(DistributedOperatorImplContainer):
class DistributedReduceSumPrimitive(DistributedOperatorImplContainer):
def __init__(self, op_type):
super().__init__(op_type)


register_distributed_operator_impl_container(
DistributedReduceSumPrimtive("reduce_sum_p")
DistributedReduceSumPrimitive("reduce_sum_p")
)


# Batch Dimension ReduceSum Primitive
class DistributedReduceSumPrimtiveImpl0(DistributedOperatorImpl):
class DistributedReduceSumPrimitiveImpl0(DistributedOperatorImpl):
def __init__(self, name):
super().__init__(name)
self._forward_implemented = True
Expand Down Expand Up @@ -237,5 +237,5 @@ def backward(ctx, *args, **kwargs):

register_distributed_operator_impl(
"reduce_sum_p",
DistributedReduceSumPrimtiveImpl0("batch_dimension_reduce_sum_p"),
DistributedReduceSumPrimitiveImpl0("batch_dimension_reduce_sum_p"),
)
10 changes: 5 additions & 5 deletions python/paddle/distributed/fleet/fleet.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,7 +387,7 @@ def allreduce_perf(
)
if perf_threshold_time > -1 and ret > perf_threshold_time:
logger.warning(
f"[Perf Warnning] AllReduce Test Timeout! {ret} > {perf_threshold_time}"
f"[Perf Warning] AllReduce Test Timeout! {ret} > {perf_threshold_time}"
)

# test reduce perf
Expand All @@ -408,7 +408,7 @@ def reduce_perf(self, iteration, x, group, perf_size, perf_threshold_time):
)
if perf_threshold_time > -1 and ret > perf_threshold_time:
logger.warning(
f"[Perf Warnning] Reduce Test Timeout! {ret} > {perf_threshold_time}"
f"[Perf Warning] Reduce Test Timeout! {ret} > {perf_threshold_time}"
)

# test broadcast perf
Expand All @@ -431,7 +431,7 @@ def broadcast_perf(
)
if perf_threshold_time > -1 and ret > perf_threshold_time:
logger.warning(
f"[Perf Warnning] Broadcast Test Timeout! {ret} > {perf_threshold_time}"
f"[Perf Warning] Broadcast Test Timeout! {ret} > {perf_threshold_time}"
)

# test allgather perf
Expand All @@ -455,7 +455,7 @@ def allgather_perf(
)
if perf_threshold_time > -1 and ret > perf_threshold_time:
logger.warning(
f"[Perf Warnning] Allgather Test Timeout! {ret} > {perf_threshold_time}"
f"[Perf Warning] Allgather Test Timeout! {ret} > {perf_threshold_time}"
)

# test reduce_scatter perf
Expand Down Expand Up @@ -498,7 +498,7 @@ def reduce_scatter_perf(
)
if perf_threshold_time > -1 and ret > perf_threshold_time:
logger.warning(
f"[Perf Warnning] ReduceScatter Test Timeout! {ret} > {perf_threshold_time}"
f"[Perf Warning] ReduceScatter Test Timeout! {ret} > {perf_threshold_time}"
)

def _collective_perf_impl(self, round=50, context={}, hcg=None):
Expand Down
4 changes: 2 additions & 2 deletions python/paddle/distributed/fleet/fleet_executor_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def task_node(self):
def set_program(self, program):
assert (
self.lazy_initialize
), "Inside program is unchangable for immediate initialized task node. Set the lazy_initialize to be true if the inside program need to be update. Remember to do all your change before eval node.task_node()."
), "Inside program is unchangeable for immediate initialized task node. Set the lazy_initialize to be true if the inside program need to be update. Remember to do all your change before eval node.task_node()."
self.program = program

def get_program(self):
Expand Down Expand Up @@ -423,7 +423,7 @@ def run1f1b(
):
"""
Split the program to support 1f1b pipeline scheduler.
This funct will split the program based on the op_role.
This function will split the program based on the op_role.
The program will be split into four parts: lr_sched, fwd, bwd, opt.
And will create task nodes based on the four parts of the program.
:param program: The origin program.
Expand Down
2 changes: 1 addition & 1 deletion python/paddle/distributed/fleet/launch_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ def get_cluster(


def terminate_local_procs(procs):
# try to terminate process by group, this happend in multiprocess senario in user process
# try to terminate process by group, this happened in multiprocess scenario in user process
if os.name != 'nt':
for p in procs:
if p.proc.poll() is None:
Expand Down
32 changes: 16 additions & 16 deletions test/legacy_test/test_merged_momentum_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
def run_momentum_op(
params,
grads,
velocitys,
velocities,
master_params,
learning_rate,
place,
Expand All @@ -34,7 +34,7 @@ def run_momentum_op(
use_merged=False,
):
assert len(params) == len(grads)
assert len(params) == len(velocitys)
assert len(params) == len(velocities)
if multi_precision:
assert len(params) == len(master_params)
op_type = 'merged_momentum' if use_merged else 'momentum'
Expand All @@ -61,7 +61,7 @@ def run_momentum_op(
helper.create_variable(
persistable=True, shape=v.shape, dtype=v.dtype
)
for v in velocitys
for v in velocities
]
lr_var = helper.create_variable(
persistable=True,
Expand All @@ -83,7 +83,7 @@ def run_momentum_op(
OrderedDict(
[
(v_var.name, v_val)
for v_var, v_val in zip(velocity_vars, velocitys)
for v_var, v_val in zip(velocity_vars, velocities)
]
)
)
Expand Down Expand Up @@ -162,7 +162,7 @@ def run_momentum_op(
def run_momentum_op2(
params,
grads,
velocitys,
velocities,
master_params,
learning_rate,
place,
Expand All @@ -173,7 +173,7 @@ def run_momentum_op2(
use_nesterov=True,
):
assert len(params) == len(grads)
assert len(params) == len(velocitys)
assert len(params) == len(velocities)
if multi_precision:
assert len(params) == len(master_params)
op_type = 'merged_momentum' if use_merged else 'momentum'
Expand All @@ -195,7 +195,7 @@ def run_momentum_op2(
helper.create_variable(
persistable=True, shape=v.shape, dtype=v.dtype
)
for v in velocitys
for v in velocities
]
lr_var = helper.create_variable(
persistable=True,
Expand All @@ -217,7 +217,7 @@ def run_momentum_op2(
OrderedDict(
[
(v_var.name, v_val)
for v_var, v_val in zip(velocity_vars, velocitys)
for v_var, v_val in zip(velocity_vars, velocities)
]
)
)
Expand Down Expand Up @@ -331,19 +331,19 @@ def prepare_data(self, shapes, multi_precision, seed, place):
)
params = self.gen_rand_data(shapes, dtype)
grads = self.gen_rand_data(shapes, dtype)
velocitys = self.gen_rand_data(shapes, mp_dtype)
velocities = self.gen_rand_data(shapes, mp_dtype)
learning_rate = self.gen_rand_data([[1]], mp_dtype)[0]
if multi_precision:
master_params = [p.astype(mp_dtype) for p in params]
else:
master_params = None
return params, grads, velocitys, master_params, learning_rate
return params, grads, velocities, master_params, learning_rate

def check_with_place(self, place, multi_precision):
(
params,
grads,
velocitys,
velocities,
master_params,
learning_rate,
) = self.prepare_data(self.shapes, multi_precision, self.seed, place)
Expand All @@ -354,7 +354,7 @@ def run_op(use_merged):
return run_momentum_op(
params,
grads,
velocitys,
velocities,
master_params,
learning_rate,
place,
Expand Down Expand Up @@ -403,19 +403,19 @@ def prepare_data(self, shapes, multi_precision, seed, place):
)
params = self.gen_rand_data(shapes, dtype)
grads = self.gen_rand_data(shapes, dtype)
velocitys = self.gen_rand_data(shapes, mp_dtype)
velocities = self.gen_rand_data(shapes, mp_dtype)
learning_rate = self.gen_rand_data([[1]], mp_dtype)[0]
if multi_precision:
master_params = [p.astype(mp_dtype) for p in params]
else:
master_params = None
return params, grads, velocitys, master_params, learning_rate
return params, grads, velocities, master_params, learning_rate

def check_with_place(self, place, multi_precision):
(
params,
grads,
velocitys,
velocities,
master_params,
learning_rate,
) = self.prepare_data(self.shapes, multi_precision, self.seed, place)
Expand All @@ -426,7 +426,7 @@ def run_op(use_nesterov, use_merged):
return run_momentum_op2(
params,
grads,
velocitys,
velocities,
master_params,
learning_rate,
place,
Expand Down
12 changes: 6 additions & 6 deletions test/legacy_test/test_momentum_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ def setUp(self):

params = []
grads = []
velocitys = []
velocities = []
learning_rates = []
master_params = []
param_outs = []
Expand Down Expand Up @@ -216,7 +216,7 @@ def setUp(self):

params.append(("SubParam_" + str(i), param))
grads.append(("SubGrad_" + str(i), grad))
velocitys.append(("SubVelocity_" + str(i), velocity))
velocities.append(("SubVelocity_" + str(i), velocity))
learning_rates.append(("SubLearning_rate_" + str(i), learning_rate))
velocity_outs.append(("SubVelocity_out_" + str(i), velocity_out))
param_outs.append(("SubParam_out_" + str(i), param_out))
Expand All @@ -228,7 +228,7 @@ def setUp(self):
self.inputs = {
'Param': params,
'Grad': grads,
'Velocity': velocitys,
'Velocity': velocities,
'LearningRate': learning_rates,
'MasterParam': master_params,
}
Expand Down Expand Up @@ -268,7 +268,7 @@ def setUp(self):

params = []
grads = []
velocitys = []
velocities = []
param_outs = []
velocity_outs = []
learning_rates = []
Expand All @@ -292,15 +292,15 @@ def setUp(self):

params.append(("SubParam_" + str(i), param))
grads.append(("SubGrad_" + str(i), grad))
velocitys.append(("SubVelocity_" + str(i), velocity))
velocities.append(("SubVelocity_" + str(i), velocity))
learning_rates.append(("SubLearning_rate_" + str(i), learning_rate))
velocity_outs.append(("SubVelocity_out_" + str(i), velocity_out))
param_outs.append(("SubParam_out_" + str(i), param_out))

self.inputs = {
'Param': params,
'Grad': grads,
'Velocity': velocitys,
'Velocity': velocities,
'LearningRate': learning_rates,
}

Expand Down
2 changes: 1 addition & 1 deletion test/legacy_test/test_mul_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,7 @@ def test_check_grad_ignore_y(self):
)


# TODO: verify the requirments of CUDA ARCH
# TODO: verify the requirements of CUDA ARCH
@unittest.skipIf(
not core.is_compiled_with_cuda() or get_cuda_version() < 11060,
"MatmulInt8 requires CUDA >= 11.6",
Expand Down
10 changes: 5 additions & 5 deletions test/legacy_test/test_multi_label_soft_margin_loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ def call_MultiLabelSoftMarginLoss_layer(
weight=None,
reduction='mean',
):
multilabel_margin_loss = paddle.nn.MultiLabelSoftMarginLoss(
multi_label_margin_loss = paddle.nn.MultiLabelSoftMarginLoss(
weight=weight, reduction=reduction
)
res = multilabel_margin_loss(
res = multi_label_margin_loss(
input=input,
label=label,
)
Expand Down Expand Up @@ -115,7 +115,7 @@ def test_dygraph(
return dy_result


def calc_multilabel_margin_loss(
def calc_multi_label_margin_loss(
input,
label,
weight=None,
Expand Down Expand Up @@ -151,7 +151,7 @@ def test_MultiLabelSoftMarginLoss(self):
reductions = ['sum', 'mean', 'none']
for place in places:
for reduction in reductions:
expected = calc_multilabel_margin_loss(
expected = calc_multi_label_margin_loss(
input=input, label=label, reduction=reduction
)

Expand Down Expand Up @@ -218,7 +218,7 @@ def test_MultiLabelSoftMarginLoss_weights(self):
weight = np.random.randint(0, 2, size=(5, 5)).astype(np.float64)
place = 'cpu'
reduction = 'mean'
expected = calc_multilabel_margin_loss(
expected = calc_multi_label_margin_loss(
input=input, label=label, weight=weight, reduction=reduction
)

Expand Down
2 changes: 1 addition & 1 deletion test/legacy_test/test_multinomial_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ def test_fixed_random_number(self):
if not paddle.is_compiled_with_cuda():
return

# Different GPU generatte different random value. Only test V100 here.
# Different GPU generate different random value. Only test V100 here.
if "V100" not in paddle.device.cuda.get_device_name():
return

Expand Down
Loading

0 comments on commit 993e06b

Please sign in to comment.