OpenHGNN
OpenHGNN copied to clipboard
How to Run OpenHgnn?
❓ Questions and Help
Hello, we are trying to use OpenHGNN to benchmark different heterogeneous graph neural networks. However, we cannot seem to be able to run openHGNN, as there seems to be some issue with serialization. Following are the code snippets we ran in Jupyter Notebook (installing dependencies with the versions listed on the github page). torch=2.3.0 was not available when running python=3.6, so we are using python=3.11 instead.
!pip install torch==2.3.0 torchvision torchaudio torchdata==0.9.0 !pip install "dgl==2.2.1" -f https://data.dgl.ai/wheels/torch-2.3/repo.html !pip install --upgrade openhgnn==0.7 igraph rdflib lmdb ordered_set
Finally, here is the code that we ran:
from openhgnn import Experiment
from openhgnn import models
def run_experiment(model, dataset, task):
experiment = Experiment(model=model, dataset=dataset, task=task, use_distributed=False)
experiment.run()
run_experiment(model=models.SeHGNN, dataset="ogbn-mag", task='node_classification')
# experiment = Experiment(model='RGCN', dataset='acm4GTN', task='node_classification', gpu=-1, lr=0.05, hidden_dim=64,
# max_epoch=30, num_layers=3)
# experiment.run()
It appears DGL sparse matrix is not serializable:
DGL backend not selected or invalid. Assuming PyTorch for now.
Setting the default backend to "pytorch". You can change it in the ~/.dgl/config.json file or export the DGLBACKEND environment variable. Valid options are: pytorch, mxnet, tensorflow (all lowercase)
/usr/local/lib/python3.11/dist-packages/torchdata/datapipes/__init__.py:18: UserWarning:
################################################################################
WARNING!
The 'datapipes', 'dataloader2' modules are deprecated and will be removed in a
future torchdata release! Please see https://github.com/pytorch/data/issues/1196
to learn more and leave feedback.
################################################################################
deprecation_warning()
10 Mar 10:47 INFO [Config Info] Model: ABCMeta, Task: node_classification, Dataset: ogbn-mag
------------------------------------------------------------------------------
Basic setup of this experiment:
model: ABCMeta
dataset: ogbn-mag
task: node_classification.
This experiment has following parameters. You can use set_params to edit them.
Use print(experiment) to print this information again.
------------------------------------------------------------------------------
dataset_name: ogbn-mag
device: cpu
gpu: -1
hpo_search_space: None
hpo_trials: 100
load_from_pretrained: False
max_epoch: 1
model_name: ABCMeta
optimizer: Adam
output_dir: ./openhgnn/output/ABCMeta
patience: 1
seed: 0
use_best_config: False
use_database: False
use_distributed: False
Downloading http://snap.stanford.edu/ogb/data/nodeproppred/mag.zip
Downloaded 0.40 GB: 100%|██████████| 413/413 [00:05<00:00, 76.48it/s]
Extracting dataset/mag.zip
Loading necessary files...
This might take a while.
Processing graphs...
100%|██████████| 1/1 [00:00<00:00, 9320.68it/s]
Converting graphs into DGL objects...
100%|██████████| 1/1 [00:37<00:00, 37.47s/it]
Saving...
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-4-0d11cd29a151> in <cell line: 0>()
6 experiment.run()
7
----> 8 run_experiment(model=models.SeHGNN, dataset="ogbn-mag", task='node_classification')
9 # experiment = Experiment(model='RGCN', dataset='acm4GTN', task='node_classification', gpu=-1, lr=0.05, hidden_dim=64,
10 # max_epoch=30, num_layers=3)
11 frames
<ipython-input-4-0d11cd29a151> in run_experiment(model, dataset, task)
4 def run_experiment(model, dataset, task):
5 experiment = Experiment(model=model, dataset=dataset, task=task, use_distributed=False)
----> 6 experiment.run()
7
8 run_experiment(model=models.SeHGNN, dataset="ogbn-mag", task='node_classification')
/usr/local/lib/python3.11/dist-packages/openhgnn/experiment.py in run(self)
210 hpo_experiment(self.config, trainerflow)
211 else:
--> 212 flow = build_flow(self.config, trainerflow)
213 result = flow.train()
214 if hasattr(self.config, 'line_profiler_func'):
/usr/local/lib/python3.11/dist-packages/openhgnn/trainerflow/__init__.py in build_flow(args, flow_name)
45 if not try_import_flow(flow_name):
46 exit(1)
---> 47 return FLOW_REGISTRY[flow_name](args)
48
49
/usr/local/lib/python3.11/dist-packages/openhgnn/trainerflow/node_classification.py in __init__(self, args)
30 """
31
---> 32 super(NodeClassification, self).__init__(args)
33 self.args.category = self.task.dataset.category
34 self.category = self.args.category
/usr/local/lib/python3.11/dist-packages/openhgnn/trainerflow/base_flow.py in __init__(self, args)
63 self.model = args.model
64 self.device = args.device
---> 65 self.task = build_task(args)
66 self.max_epoch = args.max_epoch
67 self.optimizer = None
/usr/local/lib/python3.11/dist-packages/openhgnn/tasks/__init__.py in build_task(args)
37 if not try_import_task(args.task):
38 exit(1)
---> 39 return TASK_REGISTRY[args.task](args)
40
41
/usr/local/lib/python3.11/dist-packages/openhgnn/tasks/node_classification.py in __init__(self, args)
32 super(NodeClassification, self).__init__()
33 self.logger = args.logger
---> 34 self.dataset = build_dataset(args.dataset, 'node_classification',
35 logger=self.logger,args = args)
36 if args.graphbolt:
/usr/local/lib/python3.11/dist-packages/openhgnn/dataset/__init__.py in build_dataset(dataset, task, *args, **kwargs)
209 _dataset = 'grail_'+ task
210 return DATASET_REGISTRY[_dataset](dataset, logger=kwargs['logger'],args=kwargs['args'])
--> 211 return DATASET_REGISTRY[_dataset](dataset, logger=kwargs['logger'])
212
213
/usr/local/lib/python3.11/dist-packages/openhgnn/dataset/NodeClassificationDataset.py in __init__(self, dataset_name, *args, **kwargs)
778 self.category], split_idx["test"][self.category]
779 self.g, self.label_dict = dataset[0]
--> 780 self.SeHGNN_g = self.mag4sehgnn(dataset)
781 self.g = self.mag4HGT(self.g)
782 self.label = self.label_dict[self.category].squeeze(dim=-1)
/usr/local/lib/python3.11/dist-packages/openhgnn/dataset/NodeClassificationDataset.py in mag4sehgnn(self, dataset)
850 if not os.path.exists(diag_name):
851 PP_rm_diag = row_norm(PP)
--> 852 th.save(PP_rm_diag, diag_name)
853
854 diag_name = f'ogbn-mag_PPP_diag.pt'
/usr/local/lib/python3.11/dist-packages/torch/serialization.py in save(obj, f, pickle_module, pickle_protocol, _use_new_zipfile_serialization, _disable_byteorder_record)
626 if _use_new_zipfile_serialization:
627 with _open_zipfile_writer(f) as opened_zipfile:
--> 628 _save(obj, opened_zipfile, pickle_module, pickle_protocol, _disable_byteorder_record)
629 return
630 else:
/usr/local/lib/python3.11/dist-packages/torch/serialization.py in _save(obj, zip_file, pickle_module, pickle_protocol, _disable_byteorder_record)
838 pickler = pickle_module.Pickler(data_buf, protocol=pickle_protocol)
839 pickler.persistent_id = persistent_id
--> 840 pickler.dump(obj)
841 data_value = data_buf.getvalue()
842 zip_file.write_record('data.pkl', data_value, len(data_value))
RuntimeError: Tried to serialize object __torch__.torch.classes.dgl_sparse.SparseMatrix which does not have a __getstate__ method defined!