onnxruntime icon indicating copy to clipboard operation
onnxruntime copied to clipboard

[Build] --external_graph_transformer_path doesn't. --test_external_transformer_example removed from build.py?

Open thiagocrepaldi opened this issue 9 months ago • 3 comments

Describe the issue

@snnn FYI

https://github.com/microsoft/onnxruntime/pull/9478/ was introduced to allow External Graph Transformers to be compiled along with ORT through the flag --external_graph_transformer_path PATH. This PR also introduced the flag --test_external_transformer_example to test it on CI.

However, https://github.com/microsoft/onnxruntime/pull/15416/ (accidentally?!) removed the --test_external_transformer_example flag and CI doesn't test --external_graph_transformer_path anymore, which is now broken (didn't track the exact commit for the regression though)

The repro for --external_graph_transformer_path is from the original test on the PR #9478:


# run_repro.py
import sys
import threading
import time

class OutputGrabber(object):
    """
    Class used to grab standard output or another stream.
    """
    escape_char = "\b"

    def __init__(self, stream=None, threaded=False):
        self.origstream = stream
        self.threaded = threaded
        if self.origstream is None:
            self.origstream = sys.stdout
        self.origstreamfd = self.origstream.fileno()
        self.capturedtext = ""
        # Create a pipe so the stream can be captured:
        self.pipe_out, self.pipe_in = os.pipe()

    def __enter__(self):
        self.start()
        return self

    def __exit__(self, type, value, traceback):
        self.stop()

    def start(self):
        """
        Start capturing the stream data.
        """
        self.capturedtext = ""
        # Save a copy of the stream:
        self.streamfd = os.dup(self.origstreamfd)
        # Replace the original stream with our write pipe:
        os.dup2(self.pipe_in, self.origstreamfd)
        if self.threaded:
            # Start thread that will read the stream:
            self.workerThread = threading.Thread(target=self.readOutput)
            self.workerThread.start()
            # Make sure that the thread is running and os.read() has executed:
            time.sleep(0.01)

    def stop(self):
        """
        Stop capturing the stream data and save the text in `capturedtext`.
        """
        # Print the escape character to make the readOutput method stop:
        self.origstream.write(self.escape_char)
        # Flush the stream to make sure all our data goes in before
        # the escape character:
        self.origstream.flush()
        if self.threaded:
            # wait until the thread finishes so we are sure that
            # we have until the last character:
            self.workerThread.join()
        else:
            self.readOutput()
        # Close the pipe:
        os.close(self.pipe_in)
        os.close(self.pipe_out)
        # Restore the original stream:
        os.dup2(self.streamfd, self.origstreamfd)
        # Close the duplicate stream:
        os.close(self.streamfd)

    def readOutput(self):
        """
        Read the stream data (one byte at a time)
        and save the text in `capturedtext`.
        """
        while True:
            char = os.read(self.pipe_out,1).decode(self.origstream.encoding)
            if not char or self.escape_char in char:
                break
            self.capturedtext += char

import torch
from onnxruntime.capi import _pybind_state as torch_ort_eager
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import os
from onnxruntime.training import optim, orttrainer, orttrainer_options
import unittest

def my_loss(x, target):
    return F.nll_loss(F.log_softmax(x, dim=1), target)

class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x, target):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return my_loss(out, target)

class OrtEPTests(unittest.TestCase):
    def test_external_graph_transformer_triggering(self):
        input_size = 784
        hidden_size = 500
        num_classes = 10
        batch_size = 128
        model = NeuralNet(input_size, hidden_size, num_classes)

        model_desc = {'inputs': [('x', [batch_size, input_size]),
                                    ('target', [batch_size,])],
                                    'outputs': [('loss', [], True)]}
        optim_config = optim.SGDConfig()
        opts =  orttrainer.ORTTrainerOptions({'device':{'id':'cpu'}})
        model = orttrainer.ORTTrainer(model, model_desc, optim_config, options=opts)
        # because orttrainer is lazy initialized, feed in a random data to trigger the graph transformer
        data = torch.rand(batch_size, input_size)
        target = torch.randint(0, 10, (batch_size,))

        with OutputGrabber() as out:
            loss = model.train_step(data, target)
        assert '******************Trigger Customized Graph Transformer:  MyGraphTransformer!' in out.capturedtext

if __name__ == '__main__':
    unittest.main()
# /path/to/my_external_graph_transformer.cpp
#include "core/optimizer/rewrite_rule.h"
#include "orttraining/core/optimizer/graph_transformer_registry.h"
#include "onnx/defs/schema.h"
#include <memory>
#include <iostream>

namespace onnxruntime {
namespace training {

class MyRewriteRule : public RewriteRule {
public:
  MyRewriteRule() noexcept
      : RewriteRule("MyRewriteRule") {
  }
  std::vector<std::string> TargetOpTypes() const noexcept override {
    return {};
  }

private:
  bool SatisfyCondition(const Graph& /*graph*/, const Node& /*node*/, const logging::Logger& /*logger*/) const override {
    return true;
  }

  Status Apply(Graph& /*graph*/, Node& /*node*/, RewriteRuleEffect& /*rule_effect*/, const logging::Logger& /*logger*/) const override{
    std::cout << "******************Trigger Customized Graph Transformer:  MyGraphTransformer!" << std::endl;
    return Status::OK();
  }
};

void RegisterTrainingExternalTransformers() {
  ONNX_REGISTER_EXTERNAL_REWRITE_RULE(MyRewriteRule, Level1, true);
}

}
}

Urgency

No response

Target platform

Linux

Build script

Then build with

./build.sh --config RelWithDebInfo --skip_tests --build_shared_lib --parallel --external_graph_transformer_path /home/tfernand/dev/workspace/onnxruntime_external_graph_transformer/external_graph_transformers/

Error / output

[ 47%] Building CXX object CMakeFiles/onnxruntime_optimizer.dir/home/tfernand/dev/workspace/onnxruntime_external_graph_transformer/external_graph_transformers/my_external_graph_transformer.cc.o
[ 50%] Built target onnxruntime_mlas_test
/home/tfernand/dev/workspace/onnxruntime_external_graph_transformer/external_graph_transformers/my_external_graph_transformer.cc:2:10: fatal error: orttraining/core/optimizer/graph_transformer_registry.h: No such file or directory
    2 | #include "orttraining/core/optimizer/graph_transformer_registry.h"
      |          ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
compilation terminated.
gmake[2]: *** [CMakeFiles/onnxruntime_optimizer.dir/build.make:1350: CMakeFiles/onnxruntime_optimizer.dir/home/tfernand/dev/workspace/onnxruntime_external_graph_transformer/external_graph_transformers/my_external_graph_transformer.cc.o] Error 1
gmake[1]: *** [CMakeFiles/Makefile2:2039: CMakeFiles/onnxruntime_optimizer.dir/all] Error 2
gmake[1]: *** Waiting for unfinished jobs....
[ 66%] Built target onnxruntime_providers
gmake: *** [Makefile:146: all] Error 2
Traceback (most recent call last):
  File "/home/tfernand/dev/github/onnxruntime/tools/ci_build/build.py", line 2950, in <module>
    sys.exit(main())
             ^^^^^^
  File "/home/tfernand/dev/github/onnxruntime/tools/ci_build/build.py", line 2842, in main
    build_targets(args, cmake_path, build_dir, configs, num_parallel_jobs, args.target)
  File "/home/tfernand/dev/github/onnxruntime/tools/ci_build/build.py", line 1731, in build_targets
    run_subprocess(cmd_args, env=env)
  File "/home/tfernand/dev/github/onnxruntime/tools/ci_build/build.py", line 861, in run_subprocess
    return run(*args, cwd=cwd, capture_stdout=capture_stdout, shell=shell, env=my_env)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tfernand/dev/github/onnxruntime/tools/python/util/run.py", line 49, in run
    completed_process = subprocess.run(
                        ^^^^^^^^^^^^^^^
  File "/home/tfernand/miniconda3/envs/dev1/lib/python3.11/subprocess.py", line 571, in run
    raise CalledProcessError(retcode, process.args,
subprocess.CalledProcessError: Command '['/usr/bin/cmake', '--build', '/home/tfernand/dev/github/onnxruntime/build/Linux/RelWithDebInfo', '--config', 'RelWithDebInfo', '--', '-j16']' returned non-zero exit status 2.

Visual Studio Version

No response

GCC / Compiler Version

No response

thiagocrepaldi avatar May 21 '24 17:05 thiagocrepaldi

The "--test_external_transformer_example" was only used in eager mode. And the PR deleted eager mode code.

snnn avatar May 21 '24 18:05 snnn