comtypes icon indicating copy to clipboard operation
comtypes copied to clipboard

Getting "NULL COM pointer access" when script calls from windows service

Open parsasaei opened this issue 1 year ago • 7 comments

I'm trying to make a RPC service for calling my script which used comtypes package to create a pdf file from docx. When I call my script on the command line prompt I have no any problem, but when I make calling from windows service I get this error from the output: This is my script:

  import sys
  import json
  import subprocess
  from pathlib import Path
  import os
  # from ctypes import windll
  
  # SYS Import Local Packages Installed
  path = os.path.realpath(os.path.abspath(__file__))
  sys.path.insert(0, os.path.dirname(os.path.dirname(path)))
  sys.path.append(os.path.join(os.path.dirname(__file__),"."))
  sys.path.append(os.path.join(os.path.dirname(__file__),"packages"))
  # from packages import comtypes
  from packages.comtypes import client
  # COINIT_MULTITHREADED = 0x0
  
  try:
      # 3.8+
      from importlib.metadata import version
  except ImportError:
      from importlib_metadata import version
  
  def windows(paths, keep_active):
      # word = win32com.client.Dispatch("Word.Application")
      # windll.ole32.CoInitializeEx(None, COINIT_MULTITHREADED) 
      word = client.CreateObject("Word.Application")
      wdFormatPDF = 17
  
      if paths["batch"]:
          docx_files = sorted(Path(paths["input"]).glob("*.docx") or Path(paths["input"]).glob("*.doc"))
          for i, docx_filepath in enumerate(docx_files):
              pdf_filepath = Path(paths["output"]) / (str(docx_filepath.stem) + ".pdf")
              print(f"Converting {docx_filepath} to {pdf_filepath} ({i+1}/{len(docx_files)})")
              doc = word.Documents.Open(str(docx_filepath))
              doc.SaveAs(str(pdf_filepath), FileFormat=wdFormatPDF)
              doc.Close(0)
      else:
          docx_filepath = Path(paths["input"]).resolve()
          pdf_filepath = Path(paths["output"]).resolve()
          print(f"Converting {docx_filepath} to {pdf_filepath}")
          doc = word.Documents.Open(str(docx_filepath))
          doc.SaveAs(str(pdf_filepath), FileFormat=wdFormatPDF)
          doc.Close(0)
  
      if not keep_active:
          word.Quit()
          # comtypes.CoUninitialize()
  
  def resolve_paths(input_path, output_path):
      input_path = Path(input_path).resolve()
      output_path = Path(output_path).resolve() if output_path else None
      output = {} 
      if input_path.is_dir():
          output["batch"] = True
          output["input"] = str(input_path)
          if output_path:
              assert output_path.is_dir()
          else:
              output_path = str(input_path)
          output["output"] = output_path
      else:
          output["batch"] = False
          assert str(input_path).endswith(".docx") or str(input_path).endswith(".doc")
          output["input"] = str(input_path)
          if output_path and output_path.is_dir():
              output_path = str(output_path / (str(input_path.stem) + ".pdf"))
          elif output_path:
              assert str(output_path).endswith(".pdf")
          else:
              output_path = str(input_path.parent / (str(input_path.stem) + ".pdf"))
          output["output"] = output_path
      return output
  
  def convert(input_path, output_path=None, keep_active=False):
      paths = resolve_paths(input_path, output_path)
      windows(paths, keep_active)
  
  def cli():
      import textwrap
      import argparse
  
      description = textwrap.dedent(
          """
      Example Usage:
  
      Convert single docx file in-place from myfile.docx to myfile.pdf:
          docx2pdf myfile.docx
  
      Batch convert docx folder in-place. Output PDFs will go in the same folder:
          docx2pdf myfolder/
  
      Convert single docx file with explicit output filepath:
          docx2pdf input.docx output.docx
  
      Convert single docx file and output to a different explicit folder:
          docx2pdf input.docx output_dir/
  
      Batch convert docx folder. Output PDFs will go to a different explicit folder:
          docx2pdf input_dir/ output_dir/
      """
      )
  
      formatter_class = lambda prog: argparse.RawDescriptionHelpFormatter(
          prog, max_help_position=32
      )
      parser = argparse.ArgumentParser(
          description=description, formatter_class=formatter_class
      )
      parser.add_argument(
          "input",
          help="input file or folder. batch converts entire folder or convert single file",
      )
      parser.add_argument("output", nargs="?", help="output file or folder")
      parser.add_argument(
          "--keep-active",
          action="store_true",
          default=False,
          help="prevent closing word after conversion",
      )
  
      if len(sys.argv) == 1:
          parser.print_help()
          sys.exit(0)
      else:
          args = parser.parse_args()
  
      convert(args.input, args.output, args.keep_active)
  
  if __name__ == "__main__":
      cli()

I call my script with this: python .\\w2PDF .\\word.docx .\\word.pdf

I get this on windows service output:

Traceback (most recent call last):\r\n File \"<frozen runpy>\", line 198, in _run_module_as_main\r\n File \"<frozen runpy>\", line 88, in _run_code\r\n File \"D:\\Word2PDF-Service\\WindowsService1\\bin\\Debug\\Word2PDF\\__main__.py\", line 3, in <module>\r\n File \"D:\\Word2PDF-Service\\WindowsService1\\bin\\Debug\\Word2PDF\\Word2PDF_Python.py\", line 127, in cli\r\n File \"D:\\Word2PDF-Service\\WindowsService1\\bin\\Debug\\Word2PDF\\Word2PDF_Python.py\", line 76, in convert\r\n File \"D:\\Word2PDF-Service\\WindowsService1\\bin\\Debug\\Word2PDF\\Word2PDF_Python.py\", line 41, in windows\r\n File \"D:\\Word2PDF-Service\\WindowsService1\\bin\\Debug\\Word2PDF\\packages\\comtypes\\_meta.py\", line 14, in _wrap_coclass\r\n File \"D:\\Word2PDF-Service\\WindowsService1\\bin\\Debug\\Word2PDF\\packages\\comtypes\\_post_coinit\\unknwn.py\", line 520, in QueryInterface\r\nValueError: NULL COM pointer access

I couldn't find the problem, When I change the user log on to Network Service or Local Service above error changes to access is denied :

Traceback (most recent call last):\r\n File \"<frozen runpy>\", line 198, in _run_module_as_main\r\n File \"<frozen runpy>\", line 88, in _run_code\r\n File \"D:\\Word2PDF-Service\\WindowsService1\\bin\\Debug\\Word2PDF\\__main__.py\", line 3, in <module>\r\n File \"D:\\Word2PDF-Service\\WindowsService1\\bin\\Debug\\Word2PDF\\Word2PDF_Python.py\", line 127, in cli\r\n File \"D:\\Word2PDF-Service\\WindowsService1\\bin\\Debug\\Word2PDF\\Word2PDF_Python.py\", line 76, in convert\r\n File \"D:\\Word2PDF-Service\\WindowsService1\\bin\\Debug\\Word2PDF\\Word2PDF_Python.py\", line 26, in windows\r\n File \"D:\\Word2PDF-Service\\WindowsService1\\bin\\Debug\\Word2PDF\\packages\\comtypes\\client\\__init__.py\", line 273, in CreateObject\r\n File \"D:\\Word2PDF-Service\\WindowsService1\\bin\\Debug\\Word2PDF\\packages\\comtypes\\_post_coinit\\misc.py\", line 149, in CoCreateInstance\r\n File \"_ctypes/callproc.c\", line 1008, in GetResult\r\nPermissionError: [WinError -2147024891] Access is denied

parsasaei avatar Oct 05 '24 05:10 parsasaei

Hi,

First of all, it should be mentioned that MS Office is generally not suitable for server-side automation.

I am particularly concerned about whether Word is installed in that Windows environment. Additionally, I think it might be necessary to pass the machine argument to CreateObject.

junkmd avatar Oct 05 '24 06:10 junkmd

@junkmd HI, Word is installed. About passing machine to CreateObject what change I can do on the code?

parsasaei avatar Oct 05 '24 06:10 parsasaei

I passed machine parameter, but still get that error.

parsasaei avatar Oct 05 '24 06:10 parsasaei

I think this is more of a technical issue related to COM rather than comtypes or Python.

Do you have any references for what you’re trying to do?

junkmd avatar Oct 05 '24 07:10 junkmd

I don't sure I got it right about reference you said, but the sample process for making remote procedure call I used is like: https://www.c-sharpcorner.com/article/getting-started-with-remote-procedure-call/ and after get the request from client on the server I calls my method to run script python with c#:

public Word2PDFResult word2pdfConvert(string arguments)
{
    var file = "Word2PDF";
    //var file = "Word2PDFS";
    var fileName = System.IO.Directory.GetFiles(Path.GetDirectoryName(System.Reflection.Assembly.GetEntryAssembly().Location), "*"+ file + "", System.IO.SearchOption.AllDirectories).FirstOrDefault();
    var pythonPath = "C:\\Python\\Python312\\Python";

    var output = new StringBuilder();
    var error = new StringBuilder();
    using (var process = new Process())
    {
        try 
        {
            process.StartInfo.FileName = pythonPath;
            process.StartInfo.Arguments = fileName + " " + arguments;
            //process.StartInfo.FileName = fileName;
            //process.StartInfo.Arguments = " " + arguments;
            process.StartInfo.UseShellExecute = false;
            process.StartInfo.RedirectStandardOutput = true;
            process.StartInfo.RedirectStandardError = true;
            process.StartInfo.CreateNoWindow = true;
            //process.StartInfo.Verb = "runas";
            //process.StartInfo.WorkingDirectory = Path.GetDirectoryName(fileName);

            using (AutoResetEvent outputWaitHandle = new AutoResetEvent(false))
            using (AutoResetEvent errorWaitHandle = new AutoResetEvent(false))
            {
                process.OutputDataReceived += (s, e) =>
                {
                    if (e.Data == null)
                    {
                        outputWaitHandle.Set();
                    }
                    else
                    {
                        output.AppendLine(e.Data);
                    }
                };
                process.ErrorDataReceived += (s, e) =>
                {
                    if (e.Data == null)
                    {
                        errorWaitHandle.Set();
                    }
                    else
                    {
                        error.AppendLine(e.Data);
                    }
                };
                process.Start();
                process.BeginOutputReadLine();
                process.BeginErrorReadLine();
                process.WaitForExit();
                process.Close();
            }
        }
        catch(Exception ex) { process.Kill(); }
    }

    return new Word2PDFResult { Error = error.ToString(), Output = output.ToString()  };
}

parsasaei avatar Oct 05 '24 08:10 parsasaei

Before I make the procedure like RPC service which I mentioned, I called the process with iis directly and when user is not log on on remote the server, I got this error:

Traceback (most recent call last): File "", line 198, in _run_module_as_main File "", line 88, in _run_code File "D:\Word2PDF_Python\Word2PDFS\__main__.py", line 3, in File "D:\Word2PDF_Python\Word2PDFS\Word2PDF_Python.py", line 123, in cli File "D:\Word2PDF_Python\Word2PDFS\Word2PDF_Python.py", line 72, in convert File "D:\Word2PDF_Python\Word2PDFS\Word2PDF_Python.py", line 23, in windows File "D:\Word2PDF_Python\Word2PDFS\packages\comtypes\client\__init__.py", line 273, in CreateObject File "D:\Word2PDF_Python\Word2PDFS\packages\comtypes\_post_coinit\misc.py", line 149, in CoCreateInstance File "_ctypes/callproc.c", line 1008, in GetResult OSError: [WinError -2147467238] The server process could not be started because the configured identity is incorrect. Check the username and password

When I was connecting to the remote server, the client request was successful, which means it needed to admin user be present on remote server to can client user on web can pass the request.

parsasaei avatar Oct 05 '24 08:10 parsasaei

To confess, I currently have no detail knowledge of RPC.

Recently, I had the opportunity to read some old COM technical documents, but I haven't come up with a modern way to use RPC.

Since this is a topic that requires help from the community, I have added the appropriate label. Please feel free to comment on this issue.

junkmd avatar Oct 05 '24 12:10 junkmd

I was able to reproduce this issue. But it seems that this issue is not related to comtypes, as I could reproduce the same issue with a program written in C++.

Word not working well in SYSTEM account

Seems that Word has some weird behaviors when launched in the SYSTEM account. You can launch an instance of the Word application just fine. However, when you call Documents.Open to open a file, it returns S_OK indicating success, but actually returns a NULL document pointer! What a "Task failed successfully" moment. And the document really failed to open, as you can check Documents.Count and it will return zero.

The S_OK result is enough to trick the program into thinking that the operation succeeded though, so when accessing the document pointer, it will crash.

That's what I observed with my C++ program, and for the Python program there might be a different error.

But the conclusion: Word shouldn't be used under the SYSTEM account.

CreateObject("Word.Application") in service accounts is blocked by DCOM

As for why Word failed to launch at all in Local Service or Network Service, I think that's because DCOM (Distributed COM), by default, only allows interactively logged on users and/or administrators to launch a DCOM server, for example, Word.

The SYSTEM account has administrator's privilege, so it can launch Word. Any interactively logged on user can also launch Word. But Local Service and Network Service are neither interactively logged on users nor administrators, so creating an instance of Word.Application fails with "access denied".

Launch msword.exe, then grab the active instance

But wait, all hope is not lost!

Although DCOM doesn't let us create instances of Word.Application, we still have other ways to launch Word. For example, we can get the Word installation path from the registry, then launch a Word process from that path directly, just like what a normal user would do.

This doesn't give us a COM object to control that instance of Word. But we can use comtypes.client.GetActiveObject("Word.Application") to get the currently running Word instance.

If the script is running in a service account, there wouldn't normally be any other Word instances running, so the instance we get would usually be the instance we just launched. We can't ensure that's the case when running under a normal user though, so normally we should still try creating a new Word instance in the normal way to avoid accidentally messing with other open Word instances.

Note that Word can take some time to load, and GetActiveObject("Word.Application") will fail before Word is completely loaded. We can retry this multiple times, until we successfully get an active instance.

Code that should work in service accounts

import sys
import json
import subprocess
from pathlib import Path
import os
# from ctypes import windll
import winreg

# SYS Import Local Packages Installed
path = os.path.realpath(os.path.abspath(__file__))
sys.path.insert(0, os.path.dirname(os.path.dirname(path)))
sys.path.append(os.path.join(os.path.dirname(__file__),"."))
sys.path.append(os.path.join(os.path.dirname(__file__),"packages"))
# from packages import comtypes
from comtypes import client
from time import sleep
# COINIT_MULTITHREADED = 0x0
E_ACCESSDENIED = -2147024891
MK_E_UNAVAILABLE = -2147221021


def get_word_path():
    try:
        # 64-bit Office on 64-bit Windows or 32-bit Office on 32-bit Windows
        key = winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, 
            r"SOFTWARE\Microsoft\Windows\CurrentVersion\App Paths\WINWORD.EXE")
    except FileNotFoundError:
        try:
            # 32-bit Office on 64-bit Windows
            key = winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, 
                r"SOFTWARE\WOW6432Node\Microsoft\Windows\CurrentVersion\App Paths\WINWORD.EXE")
        except FileNotFoundError:
            return None
    value, _ = winreg.QueryValueEx(key, "")
    return value


def launch_word():
    # Launch Word process
    subprocess.Popen(
        [get_word_path(), "-Embedding"],  # Add -Embedding to run Word as a COM server silently
        stdin=None, stdout=None, stderr=None, close_fds=True)
    while True:
        try:
            # Get the current active Word instance
            word = client.GetActiveObject("Word.Application")
            return word
        except OSError as err:
            if err.winerror == MK_E_UNAVAILABLE:
                # Word not ready yet, wait for a while and try again
                sleep(1)
            else:
                raise


def windows(paths, keep_active):
    # word = win32com.client.Dispatch("Word.Application")
    # windll.ole32.CoInitializeEx(None, COINIT_MULTITHREADED) 
    try:
        word = client.CreateObject("Word.Application")
    except OSError as err:
        if err.winerror == E_ACCESSDENIED:
            # If we don't have DCOM launch permission,
            # try launching Word manually
            word = launch_word()
        else:
            raise
    wdFormatPDF = 17

    if paths["batch"]:
        docx_files = sorted(Path(paths["input"]).glob("*.docx") or Path(paths["input"]).glob("*.doc"))
        for i, docx_filepath in enumerate(docx_files):
            pdf_filepath = Path(paths["output"]) / (str(docx_filepath.stem) + ".pdf")
            print(f"Converting {docx_filepath} to {pdf_filepath} ({i+1}/{len(docx_files)})")
            doc = word.Documents.Open(str(docx_filepath))
            doc.SaveAs(str(pdf_filepath), FileFormat=wdFormatPDF)
            doc.Close(0)
    else:
        docx_filepath = Path(paths["input"]).resolve()
        pdf_filepath = Path(paths["output"]).resolve()
        print(f"Converting {docx_filepath} to {pdf_filepath}")
        doc = word.Documents.Open(str(docx_filepath))
        doc.SaveAs(str(pdf_filepath), FileFormat=wdFormatPDF)
        doc.Close(0)

    if not keep_active:
        word.Quit()
        # comtypes.CoUninitialize()

def resolve_paths(input_path, output_path):
    input_path = Path(input_path).resolve()
    output_path = Path(output_path).resolve() if output_path else None
    output = {} 
    if input_path.is_dir():
        output["batch"] = True
        output["input"] = str(input_path)
        if output_path:
            assert output_path.is_dir()
        else:
            output_path = str(input_path)
        output["output"] = output_path
    else:
        output["batch"] = False
        assert str(input_path).endswith(".docx") or str(input_path).endswith(".doc")
        output["input"] = str(input_path)
        if output_path and output_path.is_dir():
            output_path = str(output_path / (str(input_path.stem) + ".pdf"))
        elif output_path:
            assert str(output_path).endswith(".pdf")
        else:
            output_path = str(input_path.parent / (str(input_path.stem) + ".pdf"))
        output["output"] = output_path
    return output

def convert(input_path, output_path=None, keep_active=False):
    paths = resolve_paths(input_path, output_path)
    windows(paths, keep_active)

def cli():
    import textwrap
    import argparse

    description = textwrap.dedent(
        """
    Example Usage:

    Convert single docx file in-place from myfile.docx to myfile.pdf:
        docx2pdf myfile.docx

    Batch convert docx folder in-place. Output PDFs will go in the same folder:
        docx2pdf myfolder/

    Convert single docx file with explicit output filepath:
        docx2pdf input.docx output.docx

    Convert single docx file and output to a different explicit folder:
        docx2pdf input.docx output_dir/

    Batch convert docx folder. Output PDFs will go to a different explicit folder:
        docx2pdf input_dir/ output_dir/
    """
    )

    formatter_class = lambda prog: argparse.RawDescriptionHelpFormatter(
        prog, max_help_position=32
    )
    parser = argparse.ArgumentParser(
        description=description, formatter_class=formatter_class
    )
    parser.add_argument(
        "input",
        help="input file or folder. batch converts entire folder or convert single file",
    )
    parser.add_argument("output", nargs="?", help="output file or folder")
    parser.add_argument(
        "--keep-active",
        action="store_true",
        default=False,
        help="prevent closing word after conversion",
    )

    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(0)
    else:
        args = parser.parse_args()

    convert(args.input, args.output, args.keep_active)

if __name__ == "__main__":
    cli()

Note that in a service account, this script will use the active Word instance, so it might not work well when multiple instances of Word or multiple instances of the script run simultaneously under the same user account.

Other ways?

If you do need to run multiple instances simultaneously, we may try other ways.

Although by default DCOM blocks CreateObject("Word.Application") in service accounts, we can change the DCOM permission setting to allow this to happen.

If you are interested in this, we can discuss it further.

gexgd0419 avatar Jul 27 '25 14:07 gexgd0419