kivy-ios
kivy-ios copied to clipboard
[WIP] OpenBLAS and Numpy
During the last ~2 months I got to what seems to be a working port of flang for iOS, but I still have to figure out how to make it work with numpy and kivy.
Here are the steps to get where I'm at, first build flang and openblas:
- Build flang for iOS whit this instructions. Build the version with embedded bitcode, feel free to ask questions here if it's not clear how.
- Build OpenBLAS with lapack for iOS: Clone openblas, save this script and run from within the repo. Everything will be installed in this same directory under the "INSTALL" folder.
Make a kivy project:
-
mkdir kivy-OpenBLAS && cd kivy-OpenBLAS
-
python -m venv venv && . venv/bin/activate
-
git clone https://github.com/kivy/kivy-ios.git && pip install -e kivy-ios
You can make modification in the kivy-ios/
folder and it will affect the installed kivy-ios.
Create a new project as per usual:
- Create a test-app folder and put a hello world main.py in it;
-
toolchain build python3 kivy
Copy the openblas files to `dist/hostpython3/:
- Copy
INSTALL/lib/libopenblas_armv8p-r0.3.19.dev.a
from the openblas folder todist/hostpython3/lib/libopenblas.a
. Note that you need to rename it. - Copy everything in
INSTALL/include
todist/hostpython3/include
So far openblas is only built for iOS therefore you will need to use toolchain build numpy --arch arm64
to test numpy.
To make changes to the numpy recipe simply edit kivy-ios/kivy_ios/recipes/numpy/__init__.py
.
My currently not working numpy recipe
from kivy_ios.toolchain import CythonRecipe
from os.path import join
import sh
import shutil
class NumpyRecipe(CythonRecipe):
version = "1.20.2"
url = "https://pypi.python.org/packages/source/n/numpy/numpy-{version}.zip"
library = "libnumpy.a"
libraries = ["libnpymath.a", "libnpyrandom.a"]
include_dir = "numpy/core/include"
depends = ["python"]
hostpython_prerequisites = ["Cython"]
cythonize = False
def prebuild_arch(self, arch):
if self.has_marker("patched"):
return
self.apply_patch("duplicated_symbols.patch")
self.set_marker("patched")
def get_recipe_env(self, arch):
env = super().get_recipe_env(arch)
# CC must have the CFLAGS with arm arch, because numpy tries first to
# compile and execute an empty C to see if the compiler works. This is
# obviously not working when crosscompiling
env["CC"] = "{} {}".format(env["CC"], env["CFLAGS"])
env["FC"] = "/ur/local/bin/flang -L/usr/local/flang-iOS"
# Disable Accelerate.framework by disabling the optimized BLAS and LAPACK libraries cause it's now unsupported
env["NPY_BLAS_ORDER"] = "openblas"
env["NPY_LAPACK_ORDER"] = "openblas"
return env
def build_arch(self, arch):
super().build_arch(arch)
sh.cp(sh.glob(join(self.build_dir, "build", "temp.*", "libnpy*.a")),
self.build_dir)
def reduce_python_package(self):
dest_dir = join(self.ctx.site_packages_dir, "numpy")
shutil.rmtree(join(dest_dir, "core", "include"))
shutil.rmtree(join(dest_dir, "core", "tests"))
shutil.rmtree(join(dest_dir, "distutils"))
shutil.rmtree(join(dest_dir, "doc"))
shutil.rmtree(join(dest_dir, "f2py", "tests"))
shutil.rmtree(join(dest_dir, "fft", "tests"))
shutil.rmtree(join(dest_dir, "lib", "tests"))
shutil.rmtree(join(dest_dir, "linalg", "tests"))
shutil.rmtree(join(dest_dir, "ma", "tests"))
shutil.rmtree(join(dest_dir, "matrixlib", "tests"))
shutil.rmtree(join(dest_dir, "polynomial", "tests"))
shutil.rmtree(join(dest_dir, "random", "tests"))
shutil.rmtree(join(dest_dir, "tests"))
recipe = NumpyRecipe()
All input is welcome!
This recipe seem to build successfully but there are still some things to be worked out.
from kivy_ios.toolchain import CythonRecipe
from os.path import join
import sh
import shutil
class NumpyRecipe(CythonRecipe):
version = "1.20.2"
url = "https://pypi.python.org/packages/source/n/numpy/numpy-{version}.zip"
library = "libnumpy.a"
libraries = ["libnpymath.a", "libnpyrandom.a"]
include_dir = "numpy/core/include"
depends = ["python"]
hostpython_prerequisites = ["Cython"]
cythonize = False
def prebuild_arch(self, arch):
if self.has_marker("patched"):
return
self.apply_patch("duplicated_symbols.patch")
self.set_marker("patched")
def get_recipe_env(self, arch):
env = super().get_recipe_env(arch)
# CC must have the CFLAGS with arm arch, because numpy tries first to
# compile and execute an empty C to see if the compiler works. This is
# obviously not working when crosscompiling
env["CC"] = "{} {} -L/usr/local/flang-iOS/lib -lflang -lflangrti -lpgmath -lompstub -lm -lpthread -lSystem".format(env["CC"], env["CFLAGS"])
# Disable Accelerate.framework by disabling the optimized BLAS and LAPACK libraries cause it's now unsupported
env["NPY_BLAS_ORDER"] = "openblas"
env["NPY_LAPACK_ORDER"] = "openblas"
return env
def build_arch(self, arch):
super().build_arch(arch)
sh.cp(sh.glob(join(self.build_dir, "build", "temp.*", "libnpy*.a")),
self.build_dir)
def reduce_python_package(self):
dest_dir = join(self.ctx.site_packages_dir, "numpy")
shutil.rmtree(join(dest_dir, "core", "include"))
shutil.rmtree(join(dest_dir, "core", "tests"))
shutil.rmtree(join(dest_dir, "distutils"))
shutil.rmtree(join(dest_dir, "doc"))
shutil.rmtree(join(dest_dir, "f2py", "tests"))
shutil.rmtree(join(dest_dir, "fft", "tests"))
shutil.rmtree(join(dest_dir, "lib", "tests"))
shutil.rmtree(join(dest_dir, "linalg", "tests"))
shutil.rmtree(join(dest_dir, "ma", "tests"))
shutil.rmtree(join(dest_dir, "matrixlib", "tests"))
shutil.rmtree(join(dest_dir, "polynomial", "tests"))
shutil.rmtree(join(dest_dir, "random", "tests"))
shutil.rmtree(join(dest_dir, "tests"))
recipe = NumpyRecipe()
The flags -lflang -lflangrti -lpgmath -lompstub -lm -lpthread -lSystem
are the default linker flags for flang.
Problems:
- The vast majority of linkings don't need those flags so we see a lot of warnings.
- I'm passing the directory
-L/usr/local/flang-iOS/lib
but the flang libs should be probably somewhere underdist/
. I did try copying them todist/hostpython3/lib/
anddist/lib
but it didn't work.
It seems the numpy setup.py or cython were trying to build a test binary but the linker was not receiving /usr/local/flang-iOS
or dist/hostpython3/lib
. Adding an -L flag solved this.
Now I'm getting a duplicated symbol _xerbla_
in the files: libnumpy.a (_multiarray_umath.cpython-39-darwin.so.o)
and libnumpy.a (lapack_lite.cpython-39-darwin.so.o)
. But lapack_lite shouldn't be build if openblas is available? Or should it?
I think I made progress:
duplicated_symbols.patch (numpy)
diff -Naur numpy-1.20.2.orig/numpy/linalg/setup.py numpy-1.20.2/numpy/linalg/setup.py
--- numpy-1.20.2.orig/numpy/linalg/setup.py 2021-04-04 11:04:17.000000000 +0200
+++ numpy-1.20.2/numpy/linalg/setup.py 2021-04-04 11:04:54.000000000 +0200
@@ -75,7 +75,7 @@
# umath_linalg module
config.add_extension(
'_umath_linalg',
- sources=['umath_linalg.c.src', get_lapack_lite_sources],
+ sources=['umath_linalg.c.src'],
depends=['lapack_lite/f2c.h'],
extra_info=lapack_info,
libraries=['npymath'],
diff -Naur numpy-1.20.2.orig/numpy/random/setup.py numpy-1.20.2/numpy/random/setup.py
--- numpy-1.20.2.orig/numpy/random/setup.py 2021-04-04 11:04:17.000000000 +0200
+++ numpy-1.20.2/numpy/random/setup.py 2021-04-04 11:05:22.000000000 +0200
@@ -127,7 +127,6 @@
config.add_extension('mtrand',
sources=['mtrand.c',
'src/legacy/legacy-distributions.c',
- 'src/distributions/distributions.c',
],
include_dirs=['.', 'src', 'src/legacy'],
libraries=['m'] if os.name != 'nt' else [],diff --git a/numpy/linalg/setup.py b/numpy/linalg/setup.py
diff --git a/numpy/linalg/setup.py b/numpy/linalg/setup.py
index 94536bb2c..c5b0d5fba 100644
--- a/numpy/linalg/setup.py
+++ b/numpy/linalg/setup.py
@@ -61,7 +61,7 @@ def get_lapack_lite_sources(ext, build_dir):
config.add_extension(
'lapack_lite',
- sources=['lapack_litemodule.c', get_lapack_lite_sources],
+ sources=['lapack_litemodule.c'],
depends=['lapack_lite/f2c.h'],
extra_info=lapack_info,
)
diff --git a/numpy/core/setup.py b/numpy/core/setup.py
index f6b31075d..f12c8b9e6 100644
--- a/numpy/core/setup.py
+++ b/numpy/core/setup.py
@@ -818,7 +818,7 @@ def gl_if_msvc(build_cmd):
# These files are also in MANIFEST.in so that they are always in
# the source distribution independently of HAVE_CBLAS.
common_src.extend([join('src', 'common', 'cblasfuncs.c'),
- join('src', 'common', 'python_xerbla.c'),
+ # join('src', 'common', 'python_xerbla.c'),
])
else:
extra_info = {}
__init__.py (numpy)
from kivy_ios.toolchain import CythonRecipe
from os.path import join
import sh
import shutil
class NumpyRecipe(CythonRecipe):
version = "1.20.2"
url = "https://pypi.python.org/packages/source/n/numpy/numpy-{version}.zip"
library = "libnumpy.a"
libraries = ["libnpymath.a", "libnpyrandom.a"]
include_dir = "numpy/core/include"
depends = ["python"]
hostpython_prerequisites = ["Cython"]
cythonize = False
def prebuild_arch(self, arch):
if self.has_marker("patched"):
return
self.apply_patch("duplicated_symbols.patch")
self.set_marker("patched")
def get_recipe_env(self, arch):
env = super().get_recipe_env(arch)
# CC must have the CFLAGS with arm arch, because numpy tries first to
# compile and execute an empty C to see if the compiler works. This is
# obviously not working when crosscompiling
env["CC"] = "{} {} -lflang -lpgmath -lflangrti -lompstub -lm -lpthread -lSystem -v -L/Users/tuco/Projects/kivy-OpenBLAS/dist/hostpython3/lib".format(env["CC"], env["CFLAGS"]) # -L/usr/local/flang-iOS/lib
# Disable Accelerate.framework by disabling the optimized BLAS and LAPACK libraries cause it's now unsupported
env["NPY_BLAS_ORDER"] = "openblas"
env["NPY_LAPACK_ORDER"] = "openblas"
return env
def build_arch(self, arch):
super().build_arch(arch)
sh.cp(sh.glob(join(self.build_dir, "build", "temp.*", "libnpy*.a")),
self.build_dir)
def reduce_python_package(self):
dest_dir = join(self.ctx.site_packages_dir, "numpy")
shutil.rmtree(join(dest_dir, "core", "include"))
shutil.rmtree(join(dest_dir, "core", "tests"))
shutil.rmtree(join(dest_dir, "distutils"))
shutil.rmtree(join(dest_dir, "doc"))
shutil.rmtree(join(dest_dir, "f2py", "tests"))
shutil.rmtree(join(dest_dir, "fft", "tests"))
shutil.rmtree(join(dest_dir, "lib", "tests"))
shutil.rmtree(join(dest_dir, "linalg", "tests"))
shutil.rmtree(join(dest_dir, "ma", "tests"))
shutil.rmtree(join(dest_dir, "matrixlib", "tests"))
shutil.rmtree(join(dest_dir, "polynomial", "tests"))
shutil.rmtree(join(dest_dir, "random", "tests"))
shutil.rmtree(join(dest_dir, "tests"))
recipe = NumpyRecipe()
But I don't know if just commenting out the source of _xerbla_
like that is the right approach? Ideally numpy should detect openblas and disable lapack_lite, right? (I think _xerbla_
comes from lapack_lite).
In any case numpy still can't find openblas when deployed to a phone.
I'm using this code to test.
import kivy
from kivy.app import App
from kivy.uix.label import Label
import numpy as np
# Replace this with your
# current version
kivy.require('1.11.1')
# Defining a class
class MyFirstKivyApp(App):
# Function that returns
# the root widget
def build(self):
# Label with text Hello World is
# returned as root widget
version = np.show_config()
return Label(text = str(version))
# Here our class is initialized
# and its run() method is called.
# This initializes and starts
# our Kivy application.
MyFirstKivyApp().run()
It seems numpy can find openblas but it shows the host's path /Users/tuco/Projects/kivy-OpenBLAS/dist/hostpython3/lib
. I'm able to run some examples but I still don't know for sure if it can use openblas.
I don't yet have a recipe, I just drop the flang and openblas .a files under dist/hostpython3/lib
and dist/lib
.
If I just do that and run the project the benchmark finishes in ~1.5 seconds but if I drop the .a files in xcode the run time goes up to ~5.5 seconds. Very weird.
benchmark
import kivy
from kivy.app import App
from kivy.uix.label import Label
from random import random
import numpy as np
from time import time
# Replace this with your
# current version
kivy.require('1.11.1')
# Defining a class
class MyFirstKivyApp(App):
# Function that returns
# the root widget
def build(self):
np.show_config()
# Let's take the randomness out of random numbers (for reproducibility)
np.random.seed(0)
size = 4096
A, B = np.random.random((size, size)), np.random.random((size, size))
# C, D = np.random.random((size * 128,)), np.random.random((size * 128,))
# E = np.random.random((int(size / 2), int(size / 4)))
# F = np.random.random((int(size / 2), int(size / 2)))
# F = np.dot(F, F.T)
# G = np.random.random((int(size / 2), int(size / 2)))
# Matrix multiplication
N = 20
t = time()
for i in range(N):
np.dot(A, B)
delta = time() - t
result = 'Dotted two %dx%d matrices in %0.2f s.' % (size, size, delta / N)
return Label(text = result)
# Here our class is initialized
# and its run() method is called.
# This initializes and starts
# our Kivy application.
MyFirstKivyApp().run()
I tried with pure numpy, size=1536
and it took ~8 seconds. (4096 took over 20 minutes and didn't finish).
The 5 seconds result from above was openblas and the 1.5 second must have been Accelerate.
Accelerate is included in the default kivy-ios project and even if numpy doesn't support it some stuff works and the symbols are there.
Calling linalg.svd and linalg.eig causes a EXC_BAD_ACCESS. I'm using the examples from here. I will post more information later.
I will try the examples on macos with openblas built with flang and hope I can replicate the crash.