onnxruntime
onnxruntime copied to clipboard
[Build] --external_graph_transformer_path doesn't. --test_external_transformer_example removed from build.py?
Describe the issue
@snnn FYI
https://github.com/microsoft/onnxruntime/pull/9478/ was introduced to allow External Graph Transformers to be compiled along with ORT through the flag --external_graph_transformer_path PATH
. This PR also introduced the flag --test_external_transformer_example
to test it on CI.
However, https://github.com/microsoft/onnxruntime/pull/15416/ (accidentally?!) removed the --test_external_transformer_example
flag and CI doesn't test --external_graph_transformer_path
anymore, which is now broken (didn't track the exact commit for the regression though)
The repro for --external_graph_transformer_path
is from the original test on the PR #9478:
# run_repro.py
import sys
import threading
import time
class OutputGrabber(object):
"""
Class used to grab standard output or another stream.
"""
escape_char = "\b"
def __init__(self, stream=None, threaded=False):
self.origstream = stream
self.threaded = threaded
if self.origstream is None:
self.origstream = sys.stdout
self.origstreamfd = self.origstream.fileno()
self.capturedtext = ""
# Create a pipe so the stream can be captured:
self.pipe_out, self.pipe_in = os.pipe()
def __enter__(self):
self.start()
return self
def __exit__(self, type, value, traceback):
self.stop()
def start(self):
"""
Start capturing the stream data.
"""
self.capturedtext = ""
# Save a copy of the stream:
self.streamfd = os.dup(self.origstreamfd)
# Replace the original stream with our write pipe:
os.dup2(self.pipe_in, self.origstreamfd)
if self.threaded:
# Start thread that will read the stream:
self.workerThread = threading.Thread(target=self.readOutput)
self.workerThread.start()
# Make sure that the thread is running and os.read() has executed:
time.sleep(0.01)
def stop(self):
"""
Stop capturing the stream data and save the text in `capturedtext`.
"""
# Print the escape character to make the readOutput method stop:
self.origstream.write(self.escape_char)
# Flush the stream to make sure all our data goes in before
# the escape character:
self.origstream.flush()
if self.threaded:
# wait until the thread finishes so we are sure that
# we have until the last character:
self.workerThread.join()
else:
self.readOutput()
# Close the pipe:
os.close(self.pipe_in)
os.close(self.pipe_out)
# Restore the original stream:
os.dup2(self.streamfd, self.origstreamfd)
# Close the duplicate stream:
os.close(self.streamfd)
def readOutput(self):
"""
Read the stream data (one byte at a time)
and save the text in `capturedtext`.
"""
while True:
char = os.read(self.pipe_out,1).decode(self.origstream.encoding)
if not char or self.escape_char in char:
break
self.capturedtext += char
import torch
from onnxruntime.capi import _pybind_state as torch_ort_eager
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import os
from onnxruntime.training import optim, orttrainer, orttrainer_options
import unittest
def my_loss(x, target):
return F.nll_loss(F.log_softmax(x, dim=1), target)
class NeuralNet(nn.Module):
def __init__(self, input_size, hidden_size, num_classes):
super(NeuralNet, self).__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
self.relu = nn.ReLU()
self.fc2 = nn.Linear(hidden_size, num_classes)
def forward(self, x, target):
out = self.fc1(x)
out = self.relu(out)
out = self.fc2(out)
return my_loss(out, target)
class OrtEPTests(unittest.TestCase):
def test_external_graph_transformer_triggering(self):
input_size = 784
hidden_size = 500
num_classes = 10
batch_size = 128
model = NeuralNet(input_size, hidden_size, num_classes)
model_desc = {'inputs': [('x', [batch_size, input_size]),
('target', [batch_size,])],
'outputs': [('loss', [], True)]}
optim_config = optim.SGDConfig()
opts = orttrainer.ORTTrainerOptions({'device':{'id':'cpu'}})
model = orttrainer.ORTTrainer(model, model_desc, optim_config, options=opts)
# because orttrainer is lazy initialized, feed in a random data to trigger the graph transformer
data = torch.rand(batch_size, input_size)
target = torch.randint(0, 10, (batch_size,))
with OutputGrabber() as out:
loss = model.train_step(data, target)
assert '******************Trigger Customized Graph Transformer: MyGraphTransformer!' in out.capturedtext
if __name__ == '__main__':
unittest.main()
# /path/to/my_external_graph_transformer.cpp
#include "core/optimizer/rewrite_rule.h"
#include "orttraining/core/optimizer/graph_transformer_registry.h"
#include "onnx/defs/schema.h"
#include <memory>
#include <iostream>
namespace onnxruntime {
namespace training {
class MyRewriteRule : public RewriteRule {
public:
MyRewriteRule() noexcept
: RewriteRule("MyRewriteRule") {
}
std::vector<std::string> TargetOpTypes() const noexcept override {
return {};
}
private:
bool SatisfyCondition(const Graph& /*graph*/, const Node& /*node*/, const logging::Logger& /*logger*/) const override {
return true;
}
Status Apply(Graph& /*graph*/, Node& /*node*/, RewriteRuleEffect& /*rule_effect*/, const logging::Logger& /*logger*/) const override{
std::cout << "******************Trigger Customized Graph Transformer: MyGraphTransformer!" << std::endl;
return Status::OK();
}
};
void RegisterTrainingExternalTransformers() {
ONNX_REGISTER_EXTERNAL_REWRITE_RULE(MyRewriteRule, Level1, true);
}
}
}
Urgency
No response
Target platform
Linux
Build script
Then build with
./build.sh --config RelWithDebInfo --skip_tests --build_shared_lib --parallel --external_graph_transformer_path /home/tfernand/dev/workspace/onnxruntime_external_graph_transformer/external_graph_transformers/
Error / output
[ 47%] Building CXX object CMakeFiles/onnxruntime_optimizer.dir/home/tfernand/dev/workspace/onnxruntime_external_graph_transformer/external_graph_transformers/my_external_graph_transformer.cc.o
[ 50%] Built target onnxruntime_mlas_test
/home/tfernand/dev/workspace/onnxruntime_external_graph_transformer/external_graph_transformers/my_external_graph_transformer.cc:2:10: fatal error: orttraining/core/optimizer/graph_transformer_registry.h: No such file or directory
2 | #include "orttraining/core/optimizer/graph_transformer_registry.h"
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
compilation terminated.
gmake[2]: *** [CMakeFiles/onnxruntime_optimizer.dir/build.make:1350: CMakeFiles/onnxruntime_optimizer.dir/home/tfernand/dev/workspace/onnxruntime_external_graph_transformer/external_graph_transformers/my_external_graph_transformer.cc.o] Error 1
gmake[1]: *** [CMakeFiles/Makefile2:2039: CMakeFiles/onnxruntime_optimizer.dir/all] Error 2
gmake[1]: *** Waiting for unfinished jobs....
[ 66%] Built target onnxruntime_providers
gmake: *** [Makefile:146: all] Error 2
Traceback (most recent call last):
File "/home/tfernand/dev/github/onnxruntime/tools/ci_build/build.py", line 2950, in <module>
sys.exit(main())
^^^^^^
File "/home/tfernand/dev/github/onnxruntime/tools/ci_build/build.py", line 2842, in main
build_targets(args, cmake_path, build_dir, configs, num_parallel_jobs, args.target)
File "/home/tfernand/dev/github/onnxruntime/tools/ci_build/build.py", line 1731, in build_targets
run_subprocess(cmd_args, env=env)
File "/home/tfernand/dev/github/onnxruntime/tools/ci_build/build.py", line 861, in run_subprocess
return run(*args, cwd=cwd, capture_stdout=capture_stdout, shell=shell, env=my_env)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tfernand/dev/github/onnxruntime/tools/python/util/run.py", line 49, in run
completed_process = subprocess.run(
^^^^^^^^^^^^^^^
File "/home/tfernand/miniconda3/envs/dev1/lib/python3.11/subprocess.py", line 571, in run
raise CalledProcessError(retcode, process.args,
subprocess.CalledProcessError: Command '['/usr/bin/cmake', '--build', '/home/tfernand/dev/github/onnxruntime/build/Linux/RelWithDebInfo', '--config', 'RelWithDebInfo', '--', '-j16']' returned non-zero exit status 2.
Visual Studio Version
No response
GCC / Compiler Version
No response
The "--test_external_transformer_example" was only used in eager mode. And the PR deleted eager mode code.