rxu rxu - 6 months ago 39
Python Question

Directly use Intel mkl library on Scipy sparse matrix

I want to call mkl.mkl_scsrmultcsr from python. The goal is to calculate a sparse matrix C in compressed sparse row format. Sparse matrix C is the matrix product between A and transpose of A, where A is also a sparse matrix in csr format.

The following doesn't work because the program didn't modify the C matrix.
How to fix that?

Here is an answer that works for another mkl function. In that answer, mkl function was faster by 4 times.

import scipy.sparse as spsp
import numpy as np
import ctypes
from ctypes import *
import multiprocessing as mp
# Load the share library
mkl = ctypes.cdll.LoadLibrary("libmkl_rt.so")

class ctype_csr_mat(object):
def __init__(self, arr_in, c_type=c_float, clear=False):
dtype_to_ctype = {}
dtype_to_ctype[c_float] = np.float32
dtype_to_ctype[c_double] = np.float64
dtype_to_ctype[c_int] = np.int16
dtype = dtype_to_ctype[c_type]

self.csr = spsp.csr_matrix(arr_in, dtype=dtype)
# Cannot use .astype method on self.csr
# after this line, or converting to ctype
# array won't work as expected.

if clear == True:
self.csr.data[:] = 0
self.csr.indptr[:] = 0
self.csr.indices[:] = 0
self.c_data = self.csr.data.ctypes.data_as(POINTER(c_type)) #*self.csr.data.size
self.c_indptr = self.csr.indptr.ctypes.data_as (POINTER(c_int))
self.c_indices = self.csr.indices.ctypes.data_as(POINTER(c_int))

def show(self, indent=4):
sp = ' '*indent
to_print = [ self.csr.data,
self.c_data[0:self.csr.data.size],
self.c_indptr[0:self.csr.indptr.size],
self.c_indices[0:self.csr.indices.size] ]
name = ['csr data','c_data', 'c_indptr', 'c_indices']
for i in range(4):
print sp + name[i] + ': ' + str(to_print[i])

def mkl_dot(self,B, C, mkl_pass):
# calculate A.T * B and put answer in C
# by several passes.
assert type(B) == ctype_csr_mat
assert type(C) == ctype_csr_mat

request_list = [c_int(0),c_int(1), c_int(2)]
request1 = request_list[mkl_pass]; request = byref(request1)

(a, ja, ia) = (self.c_data, self.c_indptr, self.c_indices)
(b, jb, ib) = (B.c_data, B.c_indptr, B.c_indices)
(c, jc, ic) = (C.c_data, C.c_indptr, C.c_indices)
(mm, nn) = self.csr.shape
# Transpose matrix A before calculating
# matrix product.
trans1 = c_char('T') ; trans = byref(trans1)
sort1 = c_int(7) ; sort = byref(sort1)
m = byref(c_int(mm)) ; n = byref(c_int(nn)) ; k = byref(c_int(mm))
nzmax1 = c_int(1) ; nzmax = byref(nzmax1)
info1 = c_int(0) ; info = byref(info1)

mkl.mkl_scsrmultcsr(trans, request, sort,
m, n, k,
a, ja, ia,
b, jb, ib,
c, jc, ic,
nzmax, info)
return info1.value
def test():
# Make Test Data
Ao = [[1,0,0],
[1,1,1],
[0,0,1]]
AA = np.array(Ao, dtype=np.single) # save the original matrix
A = ctype_csr_mat(Ao)
B = A
C = ctype_csr_mat(np.ones((3,3)), clear=True)

def show_all(A,B,C):
print("A:")
A.show()
print("B:")
B.show()
print("C:")
C.show()

show_all(A,B,C)
for mkl_pass in [0,1,2]:
print "=== pass: %d ===" % (mkl_pass)
info = A.mkl_dot(B,C,mkl_pass)
print("info:")
print(" %d" % info)
show_all(A,B,C)

print "=== Correct answer ==="
title = ["A dot A.T:", "A dot A:"]
ans = [(AA.dot(AA.T)), (AA.dot(AA))]
for i in range(2):
ans_mat = ctype_csr_mat(ans[i])
print(title[i])
print(ans[i])
ans_mat.show()

test()


Result:

A:
csr data: [ 1. 1. 1. 1. 1.]
c_data: [1.0, 1.0, 1.0, 1.0, 1.0]
c_indptr: [0, 1, 4, 5]
c_indices: [0, 0, 1, 2, 2]
B:
csr data: [ 1. 1. 1. 1. 1.]
c_data: [1.0, 1.0, 1.0, 1.0, 1.0]
c_indptr: [0, 1, 4, 5]
c_indices: [0, 0, 1, 2, 2]
C:
csr data: [ 0. 0. 0. 0. 0. 0. 0. 0. 0.]
c_data: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
c_indptr: [0, 0, 0, 0]
c_indices: [0, 0, 0, 0, 0, 0, 0, 0, 0]
=== pass: 0 ===
info:
0
A:
csr data: [ 1. 1. 1. 1. 1.]
c_data: [1.0, 1.0, 1.0, 1.0, 1.0]
c_indptr: [0, 1, 4, 5]
c_indices: [0, 0, 1, 2, 2]
B:
csr data: [ 1. 1. 1. 1. 1.]
c_data: [1.0, 1.0, 1.0, 1.0, 1.0]
c_indptr: [0, 1, 4, 5]
c_indices: [0, 0, 1, 2, 2]
C:
csr data: [ 0. 0. 0. 0. 0. 0. 0. 0. 0.]
c_data: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
c_indptr: [0, 0, 0, 0]
c_indices: [1, 1, 1, 1, 0, 0, 0, 0, 0]
=== pass: 1 ===
info:
0
A:
csr data: [ 1. 1. 1. 1. 1.]
c_data: [1.0, 1.0, 1.0, 1.0, 1.0]
c_indptr: [0, 1, 4, 5]
c_indices: [0, 0, 1, 2, 2]
B:
csr data: [ 1. 1. 1. 1. 1.]
c_data: [1.0, 1.0, 1.0, 1.0, 1.0]
c_indptr: [0, 1, 4, 5]
c_indices: [0, 0, 1, 2, 2]
C:
csr data: [ 0. 0. 0. 0. 0. 0. 0. 0. 0.]
c_data: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
c_indptr: [0, 0, 0, 0]
c_indices: [1, 1, 1, 1, 0, 0, 0, 0, 0]
=== pass: 2 ===
info:
0
A:
csr data: [ 1. 1. 1. 1. 1.]
c_data: [1.0, 1.0, 1.0, 1.0, 1.0]
c_indptr: [0, 1, 4, 5]
c_indices: [0, 0, 1, 2, 2]
B:
csr data: [ 1. 1. 1. 1. 1.]
c_data: [1.0, 1.0, 1.0, 1.0, 1.0]
c_indptr: [0, 1, 4, 5]
c_indices: [0, 0, 1, 2, 2]
C:
csr data: [ 0. 0. 0. 0. 0. 0. 0. 0. 0.]
c_data: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
c_indptr: [0, 0, 0, 0]
c_indices: [1, 1, 1, 1, 0, 0, 0, 0, 0]
=== Correct answer ===
A dot A.T:
[[ 1. 1. 0.]
[ 1. 3. 1.]
[ 0. 1. 1.]]
csr data: [ 1. 1. 1. 3. 1. 1. 1.]
c_data: [1.0, 1.0, 1.0, 3.0, 1.0, 1.0, 1.0]
c_indptr: [0, 2, 5, 7]
c_indices: [0, 1, 0, 1, 2, 1, 2]
A dot A:
[[ 1. 0. 0.]
[ 2. 1. 2.]
[ 0. 0. 1.]]
csr data: [ 1. 2. 1. 2. 1.]
c_data: [1.0, 2.0, 1.0, 2.0, 1.0]
c_indptr: [0, 1, 4, 5]
c_indices: [0, 0, 1, 2, 2]

Answer

Look at the Python code for the scipy sparse product. Notice that it calls the compiled code in 2 passes.

It looks like the mkl code does the same thing

https://software.intel.com/en-us/node/468640

If request=1, the routine computes only values of the array ic of length m + 1, the memory for this array must be allocated beforehand. On exit the value ic(m+1) - 1 is the actual number of the elements in the arrays c and jc.

If request=2, the routine has been called previously with the parameter request=1, the output arrays jc and c are allocated in the calling program and they are of the length ic(m+1) - 1 at least.

You first allocated ic based on the number of rows of C (you know that from the inputs), and call the mkl code with request=1.

For request=2 you have to allocate c and jc arrays, based on the size in ic(m+1) - 1. This is not the same as the number of nnz in the input arrays.

You are using request1 = c_int(0), which requires that the c arrays be the correct size - which you don't know without actually performing the dot (or a request 1).

==================

File:        /usr/lib/python3/dist-packages/scipy/sparse/compressed.py
Definition:  A._mul_sparse_matrix(self, other)

pass 1 allocates indptr (note size), and passes the pointers (data doesn't matter at this pass)

    indptr = np.empty(major_axis + 1, dtype=idx_dtype)

    fn = getattr(_sparsetools, self.format + '_matmat_pass1')
    fn(M, N,
       np.asarray(self.indptr, dtype=idx_dtype),
       np.asarray(self.indices, dtype=idx_dtype),
       np.asarray(other.indptr, dtype=idx_dtype),
       np.asarray(other.indices, dtype=idx_dtype),
       indptr)

    nnz = indptr[-1]

pass 2 allocates indptr (different size), and based on nnz indices and data.

    indptr = np.asarray(indptr, dtype=idx_dtype)
    indices = np.empty(nnz, dtype=idx_dtype)
    data = np.empty(nnz, dtype=upcast(self.dtype, other.dtype))

    fn = getattr(_sparsetools, self.format + '_matmat_pass2')
    fn(M, N, np.asarray(self.indptr, dtype=idx_dtype),
       np.asarray(self.indices, dtype=idx_dtype),
       self.data,
       np.asarray(other.indptr, dtype=idx_dtype),
       np.asarray(other.indices, dtype=idx_dtype),
       other.data,
       indptr, indices, data)

Last make a new array using these arrays.

    return self.__class__((data,indices,indptr),shape=(M,N))

The mkl library should be used in the same way.

===================

https://github.com/scipy/scipy/blob/master/scipy/sparse/sparsetools/csr.h

has c code for csr_matmat_pass1 and csr_matmat_pass2