From 5a7c648d8b535da0b016471d196d2f7b46e9664e Mon Sep 17 00:00:00 2001 From: Tingyu Wang Date: Tue, 10 Jan 2023 10:24:30 -0500 Subject: [PATCH 1/9] add CuGraphSAGEConv model --- docs/source/api/python/nn-pytorch.rst | 1 + python/dgl/nn/pytorch/conv/__init__.py | 2 + .../dgl/nn/pytorch/conv/cugraph_sageconv.py | 136 ++++++++++++++++++ .../cugraph-ops/test_cugraph_sageconv.py | 68 +++++++++ 4 files changed, 207 insertions(+) create mode 100644 python/dgl/nn/pytorch/conv/cugraph_sageconv.py create mode 100644 tests/cugraph/cugraph-ops/test_cugraph_sageconv.py diff --git a/docs/source/api/python/nn-pytorch.rst b/docs/source/api/python/nn-pytorch.rst index 7416624066e7..193f2be52dee 100644 --- a/docs/source/api/python/nn-pytorch.rst +++ b/docs/source/api/python/nn-pytorch.rst @@ -21,6 +21,7 @@ Conv Layers ~dgl.nn.pytorch.conv.EGATConv ~dgl.nn.pytorch.conv.EdgeConv ~dgl.nn.pytorch.conv.SAGEConv + ~dgl.nn.pytorch.conv.CuGraphSAGEConv ~dgl.nn.pytorch.conv.SGConv ~dgl.nn.pytorch.conv.APPNPConv ~dgl.nn.pytorch.conv.GINConv diff --git a/python/dgl/nn/pytorch/conv/__init__.py b/python/dgl/nn/pytorch/conv/__init__.py index 938fd5589373..4cccd8bcee9b 100644 --- a/python/dgl/nn/pytorch/conv/__init__.py +++ b/python/dgl/nn/pytorch/conv/__init__.py @@ -7,6 +7,7 @@ from .cfconv import CFConv from .chebconv import ChebConv from .cugraph_relgraphconv import CuGraphRelGraphConv +from .cugraph_sageconv import CuGraphSAGEConv from .densechebconv import DenseChebConv from .densegraphconv import DenseGraphConv from .densesageconv import DenseSAGEConv @@ -67,4 +68,5 @@ "PNAConv", "DGNConv", "CuGraphRelGraphConv", + "CuGraphSAGEConv", ] diff --git a/python/dgl/nn/pytorch/conv/cugraph_sageconv.py b/python/dgl/nn/pytorch/conv/cugraph_sageconv.py new file mode 100644 index 000000000000..e526298f67a5 --- /dev/null +++ b/python/dgl/nn/pytorch/conv/cugraph_sageconv.py @@ -0,0 +1,136 @@ +"""Torch Module for GraphSAGE layer using the aggregation primitives in +cugraph-ops""" +# pylint: disable=no-member, arguments-differ, invalid-name, too-many-arguments + +from torch import nn + +try: + from pylibcugraphops import make_fg_csr, make_mfg_csr + from pylibcugraphops.torch.autograd import agg_concat_n2n as SAGEConvAgg +except ImportError: + has_pylibcugraphops = False +else: + has_pylibcugraphops = True + + +class CuGraphSAGEConv(nn.Module): + r"""An accelerated GraphSAGE layer from `Inductive Representation Learning + on Large Graphs `__ that leverages the + highly-optimized aggregation primitives in cugraph-ops. + + See :class:`dgl.nn.pytorch.conv.SAGEConv` for mathematical model. + + This module depends on :code:`pylibcugraphops` package, which can be + installed via :code:`conda install -c nvidia pylibcugraphops>=23.02`. + + .. note:: + This is an **experimental** feature. + + Parameters + ---------- + in_feats : int + Input feature size. + out_feats : int + Output feature size. + aggregator_type : str + Aggregator type to use (``mean``, ``sum``, ``min``, ``max``). + feat_drop : float + Dropout rate on features, default: ``0``. + bias : bool + If True, adds a learnable bias to the output. Default: ``True``. + norm : callable activation function/layer or None, optional + If not None, applies normalization to the updated node features. + max_in_degree : int + Maximum number of sampled neighbors of a destination node, + i.e. maximum in-degree of destination nodes. If ``None``, it will be + calculated on the fly during :meth:`forward`. + + Examples + -------- + >>> import dgl + >>> import torch + >>> from dgl.nn import CuGraphSAGEConv + ... + >>> device = 'cuda' + >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])).to(device) + >>> g = dgl.add_self_loop(g) + >>> feat = torch.ones(6, 10).to(device) + >>> conv = CuGraphSAGEConv(10, 2, 'mean').to(device) + >>> res = conv(g, feat) + >>> res + tensor([[-1.1690, 0.1952], + [-1.1690, 0.1952], + [-1.1690, 0.1952], + [-1.1690, 0.1952], + [-1.1690, 0.1952], + [-1.1690, 0.1952]], device='cuda:0', grad_fn=) + """ + + def __init__( + self, + in_feats, + out_feats, + aggregator_type="mean", + feat_drop=0.0, + bias=True, + norm=None, + max_in_degree=None, + ): + if has_pylibcugraphops is False: + raise ModuleNotFoundError( + "dgl.nn.CuGraphSAGEConv requires pylibcugraphops >= 23.02 " + "to be installed." + ) + super().__init__() + self.in_feats = in_feats + self.out_feats = out_feats + valid_aggr_types = {"max", "min", "mean", "sum"} + if aggregator_type not in valid_aggr_types: + raise ValueError( + f"Invalid aggregator_type. Must be one of {valid_aggr_types}. " + f"But got {aggregator_type} instead." + ) + self.aggr = aggregator_type + + self.norm = norm + self.feat_drop = nn.Dropout(feat_drop) + self.max_in_degree = max_in_degree + + self.linear = nn.Linear(2 * in_feats, out_feats, bias=bias) + + def forward(self, g, feat): + r"""Forward computation. + + Parameters + ---------- + g : DGLGraph + The graph. + feat : torch.Tensor + Node features. Shape: :math:`(|V|, D_{in})`. + + Returns + ------- + torch.Tensor + Output node features. Shape: :math:`(|V|, D_{out})`. + """ + offsets, indices, _ = g.adj_sparse("csc") + + if g.is_block: + max_in_degree = self.max_in_degree + if max_in_degree is None: + max_in_degree = g.in_degrees().max().item() + + _graph = make_mfg_csr( + g.dstnodes(), g.srcnodes(), offsets, indices, max_in_degree + ) + else: + _graph = make_fg_csr(offsets, indices) + + feat = self.feat_drop(feat) + h = SAGEConvAgg(feat, _graph, self.aggr) + h = self.linear(h) + + if self.norm is not None: + h = self.norm(h) + + return h diff --git a/tests/cugraph/cugraph-ops/test_cugraph_sageconv.py b/tests/cugraph/cugraph-ops/test_cugraph_sageconv.py new file mode 100644 index 000000000000..f6f948180110 --- /dev/null +++ b/tests/cugraph/cugraph-ops/test_cugraph_sageconv.py @@ -0,0 +1,68 @@ +import pytest +import torch +import dgl +from dgl.nn import CuGraphSAGEConv +from dgl.nn import SAGEConv + +options = { + "idtype_int": [False, True], + "max_in_degree": [None, 8], + "to_block": [False, True], +} + +device = "cuda:0" + + +def generate_graph(): + u = torch.tensor([0, 1, 0, 2, 3, 0, 4, 0, 5, 0, 6, 7, 0, 8, 9]) + v = torch.tensor([1, 9, 2, 9, 9, 4, 9, 5, 9, 6, 9, 9, 8, 9, 0]) + g = dgl.graph((u, v)) + return g + + +@pytest.mark.skip() +@pytest.mark.parametrize("to_block", options["to_block"]) +@pytest.mark.parametrize("max_in_degree", options["max_in_degree"]) +@pytest.mark.parametrize("idtype_int", options["idtype_int"]) +def test_SAGEConv_equality(idtype_int, max_in_degree, to_block): + in_feat, out_feat = 5, 2 + kwargs = {"aggregator_type": "mean"} + g = generate_graph().to(device) + if idtype_int: + g = g.int() + if to_block: + g = dgl.to_block(g) + feat = torch.rand(g.num_src_nodes(), in_feat).to(device) + + torch.manual_seed(0) + conv1 = SAGEConv(in_feat, out_feat, **kwargs).to(device) + + torch.manual_seed(0) + kwargs["max_in_degree"] = max_in_degree + conv2 = CuGraphSAGEConv(in_feat, out_feat, **kwargs).to(device) + + with torch.no_grad(): + conv2.linear.weight.data[:, :in_feat] = conv1.fc_neigh.weight.data + conv2.linear.weight.data[:, in_feat:] = conv1.fc_self.weight.data + conv2.linear.bias.data[:] = conv1.fc_self.bias.data + + out1 = conv1(g, feat) + out2 = conv2(g, feat) + assert torch.allclose(out1, out2, atol=1e-06) + + grad_out = torch.rand_like(out1) + out1.backward(grad_out) + out2.backward(grad_out) + assert torch.allclose( + conv1.fc_neigh.weight.grad, + conv2.linear.weight.grad[:, :in_feat], + atol=1e-6, + ) + assert torch.allclose( + conv1.fc_self.weight.grad, + conv2.linear.weight.grad[:, in_feat:], + atol=1e-6, + ) + assert torch.allclose( + conv1.fc_self.bias.grad, conv2.linear.bias.grad, atol=1e-6 + ) From cd4c4fac5c2b15b60a816baf8d675fae9d5f72e9 Mon Sep 17 00:00:00 2001 From: Tingyu Wang Date: Tue, 10 Jan 2023 10:53:06 -0500 Subject: [PATCH 2/9] fix lint issues --- tests/cugraph/cugraph-ops/test_cugraph_sageconv.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/cugraph/cugraph-ops/test_cugraph_sageconv.py b/tests/cugraph/cugraph-ops/test_cugraph_sageconv.py index f6f948180110..8933cd50bb8e 100644 --- a/tests/cugraph/cugraph-ops/test_cugraph_sageconv.py +++ b/tests/cugraph/cugraph-ops/test_cugraph_sageconv.py @@ -1,8 +1,7 @@ +import dgl import pytest import torch -import dgl -from dgl.nn import CuGraphSAGEConv -from dgl.nn import SAGEConv +from dgl.nn import CuGraphSAGEConv, SAGEConv options = { "idtype_int": [False, True], From d4e9688f08590bfad2f109200d42fbadaea7092f Mon Sep 17 00:00:00 2001 From: Tingyu Wang Date: Thu, 19 Jan 2023 17:07:53 -0500 Subject: [PATCH 3/9] update model to reflect changes in make_mfg_csr(), move max_in_degree to forward() --- .../dgl/nn/pytorch/conv/cugraph_sageconv.py | 23 +++++++++---------- .../cugraph-ops/test_cugraph_sageconv.py | 3 +-- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/python/dgl/nn/pytorch/conv/cugraph_sageconv.py b/python/dgl/nn/pytorch/conv/cugraph_sageconv.py index e526298f67a5..024b1ded26ed 100644 --- a/python/dgl/nn/pytorch/conv/cugraph_sageconv.py +++ b/python/dgl/nn/pytorch/conv/cugraph_sageconv.py @@ -40,10 +40,6 @@ class CuGraphSAGEConv(nn.Module): If True, adds a learnable bias to the output. Default: ``True``. norm : callable activation function/layer or None, optional If not None, applies normalization to the updated node features. - max_in_degree : int - Maximum number of sampled neighbors of a destination node, - i.e. maximum in-degree of destination nodes. If ``None``, it will be - calculated on the fly during :meth:`forward`. Examples -------- @@ -74,7 +70,7 @@ def __init__( feat_drop=0.0, bias=True, norm=None, - max_in_degree=None, + ): if has_pylibcugraphops is False: raise ModuleNotFoundError( @@ -88,17 +84,15 @@ def __init__( if aggregator_type not in valid_aggr_types: raise ValueError( f"Invalid aggregator_type. Must be one of {valid_aggr_types}. " - f"But got {aggregator_type} instead." + f"But got '{aggregator_type}' instead." ) self.aggr = aggregator_type - - self.norm = norm self.feat_drop = nn.Dropout(feat_drop) - self.max_in_degree = max_in_degree + self.norm = norm self.linear = nn.Linear(2 * in_feats, out_feats, bias=bias) - def forward(self, g, feat): + def forward(self, g, feat, max_in_degree=None): r"""Forward computation. Parameters @@ -107,6 +101,12 @@ def forward(self, g, feat): The graph. feat : torch.Tensor Node features. Shape: :math:`(|V|, D_{in})`. + max_in_degree : int + Maximum in-degree of destination nodes. It is only effective when + :attr:`g` is a :class:`DGLBlock`, i.e., bipartite graph. When + :attr:`g` is generated from a neighbor sampler, the value should be + set to the corresponding :attr:`fanout`. If not given, + :attr:`max_in_degree` will be calculated on-the-fly. Returns ------- @@ -116,12 +116,11 @@ def forward(self, g, feat): offsets, indices, _ = g.adj_sparse("csc") if g.is_block: - max_in_degree = self.max_in_degree if max_in_degree is None: max_in_degree = g.in_degrees().max().item() _graph = make_mfg_csr( - g.dstnodes(), g.srcnodes(), offsets, indices, max_in_degree + g.dstnodes(), offsets, indices, max_in_degree, g.num_src_nodes() ) else: _graph = make_fg_csr(offsets, indices) diff --git a/tests/cugraph/cugraph-ops/test_cugraph_sageconv.py b/tests/cugraph/cugraph-ops/test_cugraph_sageconv.py index 8933cd50bb8e..e4419a91b8a2 100644 --- a/tests/cugraph/cugraph-ops/test_cugraph_sageconv.py +++ b/tests/cugraph/cugraph-ops/test_cugraph_sageconv.py @@ -37,7 +37,6 @@ def test_SAGEConv_equality(idtype_int, max_in_degree, to_block): conv1 = SAGEConv(in_feat, out_feat, **kwargs).to(device) torch.manual_seed(0) - kwargs["max_in_degree"] = max_in_degree conv2 = CuGraphSAGEConv(in_feat, out_feat, **kwargs).to(device) with torch.no_grad(): @@ -46,7 +45,7 @@ def test_SAGEConv_equality(idtype_int, max_in_degree, to_block): conv2.linear.bias.data[:] = conv1.fc_self.bias.data out1 = conv1(g, feat) - out2 = conv2(g, feat) + out2 = conv2(g, feat, max_in_degree=max_in_degree) assert torch.allclose(out1, out2, atol=1e-06) grad_out = torch.rand_like(out1) From 310d5b2da69425edefa37889728ec3e59aae205f Mon Sep 17 00:00:00 2001 From: Tingyu Wang Date: Thu, 19 Jan 2023 17:10:14 -0500 Subject: [PATCH 4/9] lintrunner --- python/dgl/nn/pytorch/conv/cugraph_sageconv.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/dgl/nn/pytorch/conv/cugraph_sageconv.py b/python/dgl/nn/pytorch/conv/cugraph_sageconv.py index 024b1ded26ed..f9a5b46c06d0 100644 --- a/python/dgl/nn/pytorch/conv/cugraph_sageconv.py +++ b/python/dgl/nn/pytorch/conv/cugraph_sageconv.py @@ -70,7 +70,6 @@ def __init__( feat_drop=0.0, bias=True, norm=None, - ): if has_pylibcugraphops is False: raise ModuleNotFoundError( From 59c0d87acf7acc9b29a9354e0fd3ebcc3a181ce7 Mon Sep 17 00:00:00 2001 From: Tingyu Wang Date: Thu, 19 Jan 2023 17:24:19 -0500 Subject: [PATCH 5/9] allow reset_parameters() --- python/dgl/nn/pytorch/conv/cugraph_sageconv.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/python/dgl/nn/pytorch/conv/cugraph_sageconv.py b/python/dgl/nn/pytorch/conv/cugraph_sageconv.py index f9a5b46c06d0..2461f97a5a65 100644 --- a/python/dgl/nn/pytorch/conv/cugraph_sageconv.py +++ b/python/dgl/nn/pytorch/conv/cugraph_sageconv.py @@ -91,6 +91,10 @@ def __init__( self.linear = nn.Linear(2 * in_feats, out_feats, bias=bias) + def reset_parameters(self): + r"""Reinitialize learnable parameters.""" + self.linear.reset_parameters() + def forward(self, g, feat, max_in_degree=None): r"""Forward computation. From 9e729a3d556f8bd471bb0716b4d068efa97e987f Mon Sep 17 00:00:00 2001 From: Tingyu Wang Date: Thu, 26 Jan 2023 12:40:49 -0500 Subject: [PATCH 6/9] remove norm option, simplify test --- .../dgl/nn/pytorch/conv/cugraph_sageconv.py | 7 ------ .../cugraph-ops/test_cugraph_sageconv.py | 23 +++++++++++-------- 2 files changed, 13 insertions(+), 17 deletions(-) diff --git a/python/dgl/nn/pytorch/conv/cugraph_sageconv.py b/python/dgl/nn/pytorch/conv/cugraph_sageconv.py index 2461f97a5a65..3112830ae433 100644 --- a/python/dgl/nn/pytorch/conv/cugraph_sageconv.py +++ b/python/dgl/nn/pytorch/conv/cugraph_sageconv.py @@ -38,8 +38,6 @@ class CuGraphSAGEConv(nn.Module): Dropout rate on features, default: ``0``. bias : bool If True, adds a learnable bias to the output. Default: ``True``. - norm : callable activation function/layer or None, optional - If not None, applies normalization to the updated node features. Examples -------- @@ -69,7 +67,6 @@ def __init__( aggregator_type="mean", feat_drop=0.0, bias=True, - norm=None, ): if has_pylibcugraphops is False: raise ModuleNotFoundError( @@ -87,7 +84,6 @@ def __init__( ) self.aggr = aggregator_type self.feat_drop = nn.Dropout(feat_drop) - self.norm = norm self.linear = nn.Linear(2 * in_feats, out_feats, bias=bias) @@ -132,7 +128,4 @@ def forward(self, g, feat, max_in_degree=None): h = SAGEConvAgg(feat, _graph, self.aggr) h = self.linear(h) - if self.norm is not None: - h = self.norm(h) - return h diff --git a/tests/cugraph/cugraph-ops/test_cugraph_sageconv.py b/tests/cugraph/cugraph-ops/test_cugraph_sageconv.py index e4419a91b8a2..5ebad58f9825 100644 --- a/tests/cugraph/cugraph-ops/test_cugraph_sageconv.py +++ b/tests/cugraph/cugraph-ops/test_cugraph_sageconv.py @@ -1,15 +1,19 @@ +# pylint: disable=too-many-arguments, too-many-locals +from collections import OrderedDict +from itertools import product + import dgl import pytest import torch from dgl.nn import CuGraphSAGEConv, SAGEConv -options = { - "idtype_int": [False, True], - "max_in_degree": [None, 8], - "to_block": [False, True], -} - -device = "cuda:0" +options = OrderedDict( + { + "idtype_int": [False, True], + "max_in_degree": [None, 8], + "to_block": [False, True], + } +) def generate_graph(): @@ -20,10 +24,9 @@ def generate_graph(): @pytest.mark.skip() -@pytest.mark.parametrize("to_block", options["to_block"]) -@pytest.mark.parametrize("max_in_degree", options["max_in_degree"]) -@pytest.mark.parametrize("idtype_int", options["idtype_int"]) +@pytest.mark.parametrize(",".join(options.keys()), product(*options.values())) def test_SAGEConv_equality(idtype_int, max_in_degree, to_block): + device = "cuda:0" in_feat, out_feat = 5, 2 kwargs = {"aggregator_type": "mean"} g = generate_graph().to(device) From a5283211060a8acf9f8e712d036c9a035ada8405 Mon Sep 17 00:00:00 2001 From: Tingyu Wang Date: Thu, 16 Feb 2023 22:56:47 -0500 Subject: [PATCH 7/9] allow full graph fallback option, add example --- examples/advanced/cugraph/graphsage.py | 200 ++++++++++++++++++ .../dgl/nn/pytorch/conv/cugraph_sageconv.py | 38 +++- 2 files changed, 228 insertions(+), 10 deletions(-) create mode 100644 examples/advanced/cugraph/graphsage.py diff --git a/examples/advanced/cugraph/graphsage.py b/examples/advanced/cugraph/graphsage.py new file mode 100644 index 000000000000..121c5bc72e24 --- /dev/null +++ b/examples/advanced/cugraph/graphsage.py @@ -0,0 +1,200 @@ +import argparse + +import torch +import torch.nn as nn +import torch.nn.functional as F +import torchmetrics.functional as MF +import tqdm +from dgl.data import AsNodePredDataset +from dgl.dataloading import ( + DataLoader, + MultiLayerFullNeighborSampler, + NeighborSampler, +) +from dgl.nn import CuGraphSAGEConv +from ogb.nodeproppred import DglNodePropPredDataset + + +class SAGE(nn.Module): + def __init__(self, in_size, hid_size, out_size): + super().__init__() + self.layers = nn.ModuleList() + # three-layer GraphSAGE-mean + self.layers.append(CuGraphSAGEConv(in_size, hid_size, "mean")) + self.layers.append(CuGraphSAGEConv(hid_size, hid_size, "mean")) + self.layers.append(CuGraphSAGEConv(hid_size, out_size, "mean")) + self.dropout = nn.Dropout(0.5) + self.hid_size = hid_size + self.out_size = out_size + + def forward(self, blocks, x): + h = x + for l, (layer, block) in enumerate(zip(self.layers, blocks)): + h = layer(block, h, max_in_degree=10) + if l != len(self.layers) - 1: + h = F.relu(h) + h = self.dropout(h) + return h + + def inference(self, g, device, batch_size): + """Conduct layer-wise inference to get all the node embeddings.""" + feat = g.ndata["feat"] + sampler = MultiLayerFullNeighborSampler(1, prefetch_node_feats=["feat"]) + dataloader = DataLoader( + g, + torch.arange(g.num_nodes()).to(g.device), + sampler, + device=device, + batch_size=batch_size, + shuffle=False, + drop_last=False, + num_workers=0, + ) + buffer_device = torch.device("cpu") + pin_memory = buffer_device != device + + for l, layer in enumerate(self.layers): + y = torch.empty( + g.num_nodes(), + self.hid_size if l != len(self.layers) - 1 else self.out_size, + device=buffer_device, + pin_memory=pin_memory, + ) + feat = feat.to(device) + for input_nodes, output_nodes, blocks in tqdm.tqdm(dataloader): + x = feat[input_nodes] + h = layer(blocks[0], x) # len(blocks) = 1 + if l != len(self.layers) - 1: + h = F.relu(h) + h = self.dropout(h) + # by design, our output nodes are contiguous + y[output_nodes[0] : output_nodes[-1] + 1] = h.to(buffer_device) + feat = y + return y + + +def evaluate(model, graph, dataloader): + model.eval() + ys = [] + y_hats = [] + for it, (input_nodes, output_nodes, blocks) in enumerate(dataloader): + with torch.no_grad(): + x = blocks[0].srcdata["feat"] + ys.append(blocks[-1].dstdata["label"]) + y_hats.append(model(blocks, x)) + num_classes = y_hats[0].shape[1] + return MF.accuracy( + torch.cat(y_hats), + torch.cat(ys), + task="multiclass", + num_classes=num_classes, + ) + + +def layerwise_infer(device, graph, nid, model, batch_size): + model.eval() + with torch.no_grad(): + pred = model.inference( + graph, device, batch_size + ) # pred in buffer_device + pred = pred[nid] + label = graph.ndata["label"][nid].to(pred.device) + num_classes = pred.shape[1] + return MF.accuracy( + pred, label, task="multiclass", num_classes=num_classes + ) + + +def train(args, device, g, dataset, model): + # create sampler & dataloader + train_idx = dataset.train_idx.to(device) + val_idx = dataset.val_idx.to(device) + sampler = NeighborSampler( + [10, 10, 10], # fanout for [layer-0, layer-1, layer-2] + prefetch_node_feats=["feat"], + prefetch_labels=["label"], + ) + use_uva = args.mode == "mixed" + train_dataloader = DataLoader( + g, + train_idx, + sampler, + device=device, + batch_size=1024, + shuffle=True, + drop_last=False, + num_workers=0, + use_uva=use_uva, + ) + + val_dataloader = DataLoader( + g, + val_idx, + sampler, + device=device, + batch_size=1024, + shuffle=True, + drop_last=False, + num_workers=0, + use_uva=use_uva, + ) + + opt = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=5e-4) + + for epoch in range(10): + model.train() + total_loss = 0 + for it, (input_nodes, output_nodes, blocks) in enumerate( + train_dataloader + ): + x = blocks[0].srcdata["feat"] + y = blocks[-1].dstdata["label"] + y_hat = model(blocks, x) + loss = F.cross_entropy(y_hat, y) + opt.zero_grad() + loss.backward() + opt.step() + + total_loss += loss.item() + acc = evaluate(model, g, val_dataloader) + print( + "Epoch {:05d} | Loss {:.4f} | Accuracy {:.4f} ".format( + epoch, total_loss / (it + 1), acc.item() + ) + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--mode", + default="mixed", + choices=["cpu", "mixed", "puregpu"], + help="Training mode. 'cpu' for CPU training, 'mixed' for CPU-GPU mixed training, " + "'puregpu' for pure-GPU training.", + ) + args = parser.parse_args() + if not torch.cuda.is_available(): + args.mode = "cpu" + print(f"Training in {args.mode} mode.") + + # load and preprocess dataset + print("Loading data") + dataset = AsNodePredDataset(DglNodePropPredDataset("ogbn-products")) + g = dataset[0] + g = g.to("cuda" if args.mode == "puregpu" else "cpu") + device = torch.device("cpu" if args.mode == "cpu" else "cuda") + + # create GraphSAGE model + in_size = g.ndata["feat"].shape[1] + out_size = dataset.num_classes + model = SAGE(in_size, 256, out_size).to(device) + + # model training + print("Training...") + train(args, device, g, dataset, model) + + # test the model + print("Testing...") + acc = layerwise_infer(device, g, dataset.test_idx, model, batch_size=4096) + print("Test Accuracy {:.4f}".format(acc.item())) diff --git a/python/dgl/nn/pytorch/conv/cugraph_sageconv.py b/python/dgl/nn/pytorch/conv/cugraph_sageconv.py index 3112830ae433..4ba1742dc431 100644 --- a/python/dgl/nn/pytorch/conv/cugraph_sageconv.py +++ b/python/dgl/nn/pytorch/conv/cugraph_sageconv.py @@ -2,6 +2,7 @@ cugraph-ops""" # pylint: disable=no-member, arguments-differ, invalid-name, too-many-arguments +import torch from torch import nn try: @@ -59,6 +60,7 @@ class CuGraphSAGEConv(nn.Module): [-1.1690, 0.1952], [-1.1690, 0.1952]], device='cuda:0', grad_fn=) """ + MAX_IN_DEGREE_MFG = 500 def __init__( self, @@ -70,21 +72,22 @@ def __init__( ): if has_pylibcugraphops is False: raise ModuleNotFoundError( - "dgl.nn.CuGraphSAGEConv requires pylibcugraphops >= 23.02 " - "to be installed." + f"{self.__class__.__name__} requires pylibcugraphops >= 23.02 " + f"to be installed." ) - super().__init__() - self.in_feats = in_feats - self.out_feats = out_feats + valid_aggr_types = {"max", "min", "mean", "sum"} if aggregator_type not in valid_aggr_types: raise ValueError( f"Invalid aggregator_type. Must be one of {valid_aggr_types}. " f"But got '{aggregator_type}' instead." ) + + super().__init__() + self.in_feats = in_feats + self.out_feats = out_feats self.aggr = aggregator_type self.feat_drop = nn.Dropout(feat_drop) - self.linear = nn.Linear(2 * in_feats, out_feats, bias=bias) def reset_parameters(self): @@ -118,14 +121,29 @@ def forward(self, g, feat, max_in_degree=None): if max_in_degree is None: max_in_degree = g.in_degrees().max().item() - _graph = make_mfg_csr( - g.dstnodes(), offsets, indices, max_in_degree, g.num_src_nodes() - ) + if max_in_degree < self.MAX_IN_DEGREE_MFG: + _graph = make_mfg_csr( + g.dstnodes(), + offsets, + indices, + max_in_degree, + g.num_src_nodes(), + ) + else: + offsets_fg = torch.empty( + g.num_src_nodes() + 1, + dtype=offsets.dtype, + device=offsets.device, + ) + offsets_fg[: offsets.numel()] = offsets + offsets_fg[offsets.numel() :] = offsets[-1] + + _graph = make_fg_csr(offsets_fg, indices) else: _graph = make_fg_csr(offsets, indices) feat = self.feat_drop(feat) - h = SAGEConvAgg(feat, _graph, self.aggr) + h = SAGEConvAgg(feat, _graph, self.aggr)[: g.num_dst_nodes()] h = self.linear(h) return h From 796d73a291d16edf96a0c50a9bf389281cd69264 Mon Sep 17 00:00:00 2001 From: Tingyu Wang Date: Fri, 17 Feb 2023 17:02:34 -0500 Subject: [PATCH 8/9] address comments --- docs/source/api/python/nn-pytorch.rst | 7 +++++-- python/dgl/nn/pytorch/conv/cugraph_sageconv.py | 18 +++++++++++------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/docs/source/api/python/nn-pytorch.rst b/docs/source/api/python/nn-pytorch.rst index 193f2be52dee..92ce0dcd1fd4 100644 --- a/docs/source/api/python/nn-pytorch.rst +++ b/docs/source/api/python/nn-pytorch.rst @@ -14,14 +14,12 @@ Conv Layers ~dgl.nn.pytorch.conv.GraphConv ~dgl.nn.pytorch.conv.EdgeWeightNorm ~dgl.nn.pytorch.conv.RelGraphConv - ~dgl.nn.pytorch.conv.CuGraphRelGraphConv ~dgl.nn.pytorch.conv.TAGConv ~dgl.nn.pytorch.conv.GATConv ~dgl.nn.pytorch.conv.GATv2Conv ~dgl.nn.pytorch.conv.EGATConv ~dgl.nn.pytorch.conv.EdgeConv ~dgl.nn.pytorch.conv.SAGEConv - ~dgl.nn.pytorch.conv.CuGraphSAGEConv ~dgl.nn.pytorch.conv.SGConv ~dgl.nn.pytorch.conv.APPNPConv ~dgl.nn.pytorch.conv.GINConv @@ -43,6 +41,11 @@ Conv Layers ~dgl.nn.pytorch.conv.PNAConv ~dgl.nn.pytorch.conv.DGNConv +CuGraph Conv Layers +---------------------------------------- + ~dgl.nn.pytorch.conv.CuGraphRelGraphConv + ~dgl.nn.pytorch.conv.CuGraphSAGEConv + Dense Conv Layers ---------------------------------------- diff --git a/python/dgl/nn/pytorch/conv/cugraph_sageconv.py b/python/dgl/nn/pytorch/conv/cugraph_sageconv.py index 4ba1742dc431..b15aca8a9ce6 100644 --- a/python/dgl/nn/pytorch/conv/cugraph_sageconv.py +++ b/python/dgl/nn/pytorch/conv/cugraph_sageconv.py @@ -17,9 +17,14 @@ class CuGraphSAGEConv(nn.Module): r"""An accelerated GraphSAGE layer from `Inductive Representation Learning on Large Graphs `__ that leverages the - highly-optimized aggregation primitives in cugraph-ops. + highly-optimized aggregation primitives in cugraph-ops: - See :class:`dgl.nn.pytorch.conv.SAGEConv` for mathematical model. + .. math:: + h_{\mathcal{N}(i)}^{(l+1)} &= \mathrm{aggregate} + \left(\{h_{j}^{l}, \forall j \in \mathcal{N}(i) \}\right) + + h_{i}^{(l+1)} &= W \cdot \mathrm{concat} + (h_{i}^{l}, h_{\mathcal{N}(i)}^{(l+1)}) This module depends on :code:`pylibcugraphops` package, which can be installed via :code:`conda install -c nvidia pylibcugraphops>=23.02`. @@ -45,7 +50,6 @@ class CuGraphSAGEConv(nn.Module): >>> import dgl >>> import torch >>> from dgl.nn import CuGraphSAGEConv - ... >>> device = 'cuda' >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])).to(device) >>> g = dgl.add_self_loop(g) @@ -72,8 +76,8 @@ def __init__( ): if has_pylibcugraphops is False: raise ModuleNotFoundError( - f"{self.__class__.__name__} requires pylibcugraphops >= 23.02 " - f"to be installed." + f"{self.__class__.__name__} requires pylibcugraphops >= 23.02. " + f"Install via `conda install -c nvidia 'pylibcugraphops>=23.02'`." ) valid_aggr_types = {"max", "min", "mean", "sum"} @@ -102,7 +106,7 @@ def forward(self, g, feat, max_in_degree=None): g : DGLGraph The graph. feat : torch.Tensor - Node features. Shape: :math:`(|V|, D_{in})`. + Node features. Shape: :math:`(N, D_{in})`. max_in_degree : int Maximum in-degree of destination nodes. It is only effective when :attr:`g` is a :class:`DGLBlock`, i.e., bipartite graph. When @@ -113,7 +117,7 @@ def forward(self, g, feat, max_in_degree=None): Returns ------- torch.Tensor - Output node features. Shape: :math:`(|V|, D_{out})`. + Output node features. Shape: :math:`(N, D_{out})`. """ offsets, indices, _ = g.adj_sparse("csc") From 450c5336e9a5b9f847bf0b7f6f52a717da6a63e3 Mon Sep 17 00:00:00 2001 From: Tingyu Wang Date: Tue, 21 Feb 2023 16:38:28 -0500 Subject: [PATCH 9/9] address reviews --- docs/source/api/python/nn-pytorch.rst | 6 ++++++ examples/advanced/cugraph/graphsage.py | 6 +++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/docs/source/api/python/nn-pytorch.rst b/docs/source/api/python/nn-pytorch.rst index 92ce0dcd1fd4..ed9a41bd1aa7 100644 --- a/docs/source/api/python/nn-pytorch.rst +++ b/docs/source/api/python/nn-pytorch.rst @@ -43,6 +43,12 @@ Conv Layers CuGraph Conv Layers ---------------------------------------- + +.. autosummary:: + :toctree: ../../generated/ + :nosignatures: + :template: classtemplate.rst + ~dgl.nn.pytorch.conv.CuGraphRelGraphConv ~dgl.nn.pytorch.conv.CuGraphSAGEConv diff --git a/examples/advanced/cugraph/graphsage.py b/examples/advanced/cugraph/graphsage.py index 121c5bc72e24..bb9e12af7173 100644 --- a/examples/advanced/cugraph/graphsage.py +++ b/examples/advanced/cugraph/graphsage.py @@ -30,7 +30,7 @@ def __init__(self, in_size, hid_size, out_size): def forward(self, blocks, x): h = x for l, (layer, block) in enumerate(zip(self.layers, blocks)): - h = layer(block, h, max_in_degree=10) + h = layer(block, h) if l != len(self.layers) - 1: h = F.relu(h) h = self.dropout(h) @@ -169,8 +169,8 @@ def train(args, device, g, dataset, model): parser.add_argument( "--mode", default="mixed", - choices=["cpu", "mixed", "puregpu"], - help="Training mode. 'cpu' for CPU training, 'mixed' for CPU-GPU mixed training, " + choices=["mixed", "puregpu"], + help="Training mode. 'mixed' for CPU-GPU mixed training, " "'puregpu' for pure-GPU training.", ) args = parser.parse_args()