Python Examples¶
This section contains simple and advanced examples using the Python Kompute class. For an overview of the module check Python Package Overview, for a deep dive into functions check the Python Class Reference Section.
You will be able to run the examples below by installing the dependencies in python/test/requirements-dev.txt
Python Example (Simple)¶
Then you can interact with it from your interpreter. Below is the same sample as above “Your First Kompute (Simple Version)” but in Python:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 | from kp import Manager, Tensor, OpTensorSyncDevice, OpTensorSyncLocal, OpAlgoDispatch from pyshader import python2shader, ivec3, f32, Array mgr = Manager() # Can be initialized with List[] or np.Array tensor_in_a = mgr.tensor([2, 2, 2]) tensor_in_b = mgr.tensor([1, 2, 3]) tensor_out = mgr.tensor([0, 0, 0]) sq = mgr.sequence() sq.eval(OpTensorSyncDevice([tensor_in_a, tensor_in_b, tensor_out])) # Define the function via PyShader or directly as glsl string or spirv bytes @python2shader def compute_shader_multiply(index=("input", "GlobalInvocationId", ivec3), data1=("buffer", 0, Array(f32)), data2=("buffer", 1, Array(f32)), data3=("buffer", 2, Array(f32))): i = index.x data3[i] = data1[i] * data2[i] algo = mgr.algorithm([tensor_in_a, tensor_in_b, tensor_out], compute_shader_multiply.to_spirv()) # Run shader operation synchronously sq.eval(OpAlgoDispatch(algo)) sq.eval(OpTensorSyncLocal([tensor_out])) assert tensor_out.data().tolist() == [2.0, 4.0, 6.0] |
Python Example (Extended)¶
Similarly you can find the same extended example as above:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 | from kp import Manager, Tensor import kp from pyshader import python2shader, ivec3, f32, Array mgr = Manager(0, [2]) # Can be initialized with List[] or np.Array tensor_in_a = mgr.tensor([2, 2, 2]) tensor_in_b = mgr.tensor([1, 2, 3]) tensor_out = mgr.tensor([0, 0, 0]) seq = mgr.sequence() seq.eval(kp.OpTensorSyncDevice([tensor_in_a, tensor_in_b, tensor_out])) # Define the function via PyShader or directly as glsl string or spirv bytes @python2shader def compute_shader_multiply(index=("input", "GlobalInvocationId", ivec3), data1=("buffer", 0, Array(f32)), data2=("buffer", 1, Array(f32)), data3=("buffer", 2, Array(f32))): i = index.x data3[i] = data1[i] * data2[i] algo = mgr.algorithm([tensor_in_a, tensor_in_b, tensor_out], compute_shader_multiply.to_spirv()) # Run shader operation asynchronously and then await seq.eval_async(kp.OpAlgoDispatch(algo)) seq.eval_await() seq.record(kp.OpTensorSyncLocal([tensor_in_a])) seq.record(kp.OpTensorSyncLocal([tensor_in_b])) seq.record(kp.OpTensorSyncLocal([tensor_out])) seq.eval() assert tensor_out.data().tolist() == [2.0, 4.0, 6.0] |
Kompute Operation Capabilities¶
Handling multiple capabilites of processing can be done by compute shaders being loaded into separate sequences. The example below shows how this can be done:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 | from kp import Manager import kp # We'll assume we have the shader data available from my_spv_shader_data import mult_shader, sum_shader mgr = Manager() t1 = mgr.tensor([2,2,2]) t2 = mgr.tensor([1,2,3]) t3 = mgr.tensor([1,2,3]) mgr.sequence().eval(kp.OpTensorSyncLocal([t1, t3])) # Create multiple separate sequences sq_mult = mgr.sequence() sq_sum = mgr.sequence() sq_sync = mgr.sequence() sq_mult.record(kp.OpAlgoDispatch(mgr.algorithm([t1, t2, t3], add_shader)) sq_sum.record(kp.OpAlgoDispatch(mgr.algorithm([t3, t2, t1], sum_shader)) sq_sync.record(kp.OpTensorSyncLocal([t1, t3])) # Run multiple iterations for i in range(10): sq_mult.eval() sq_sum.eval() sq_sync.eval() print(t1.data(), t2.data(), t3.data()) |
Machine Learning Logistic Regression Implementation¶
Similar to the logistic regression implementation in the C++ examples section, below you can find the Python implementation of the Logistic Regression algorithm.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 | from kp import Manager, Tensor import kp from pyshader import python2shader, ivec3, f32, Array @python2shader def compute_shader( index = ("input", "GlobalInvocationId", ivec3), x_i = ("buffer", 0, Array(f32)), x_j = ("buffer", 1, Array(f32)), y = ("buffer", 2, Array(f32)), w_in = ("buffer", 3, Array(f32)), w_out_i = ("buffer", 4, Array(f32)), w_out_j = ("buffer", 5, Array(f32)), b_in = ("buffer", 6, Array(f32)), b_out = ("buffer", 7, Array(f32)), l_out = ("buffer", 8, Array(f32)), M = ("buffer", 9, Array(f32))): i = index.x m = M[0] w_curr = vec2(w_in[0], w_in[1]) b_curr = b_in[0] x_curr = vec2(x_i[i], x_j[i]) y_curr = y[i] z_dot = w_curr @ x_curr z = z_dot + b_curr y_hat = 1.0 / (1.0 + exp(-z)) d_z = y_hat - y_curr d_w = (1.0 / m) * x_curr * d_z d_b = (1.0 / m) * d_z loss = -((y_curr * log(y_hat)) + ((1.0 + y_curr) * log(1.0 - y_hat))) w_out_i[i] = d_w.x w_out_j[i] = d_w.y b_out[i] = d_b l_out[i] = loss mgr = Manager() # First we create input and ouput tensors for shader tensor_x_i = mgr.tensor([0.0, 1.0, 1.0, 1.0, 1.0]) tensor_x_j = mgr.tensor([0.0, 0.0, 0.0, 1.0, 1.0]) tensor_y = mgr.tensor([0.0, 0.0, 0.0, 1.0, 1.0]) tensor_w_in = mgr.tensor([0.001, 0.001]) tensor_w_out_i = mgr.tensor([0.0, 0.0, 0.0, 0.0, 0.0]) tensor_w_out_j = mgr.tensor([0.0, 0.0, 0.0, 0.0, 0.0]) tensor_b_in = mgr.tensor([0.0]) tensor_b_out = mgr.tensor([0.0, 0.0, 0.0, 0.0, 0.0]) tensor_l_out = mgr.tensor([0.0, 0.0, 0.0, 0.0, 0.0]) tensor_m = mgr.tensor([ 5.0 ]) # We store them in an array for easier interaction params = [tensor_x_i, tensor_x_j, tensor_y, tensor_w_in, tensor_w_out_i, tensor_w_out_j, tensor_b_in, tensor_b_out, tensor_l_out, tensor_m] sq.sequence().eval(kp.OpTensorSyncDevice(params)) # Record commands for efficient evaluation sq = mgr.sequence() sq.record(kp.OpTensorSyncDevice([tensor_w_in, tensor_b_in])) sq.record(kp.OpAlgoDispatch(mgr.algorithm(params, compute_shader.to_spirv()))) sq.record(kp.OpTensorSyncLocal([tensor_w_out_i, tensor_w_out_j, tensor_b_out, tensor_l_out])) ITERATIONS = 100 learning_rate = 0.1 # Perform machine learning training and inference across all input X and Y for i_iter in range(ITERATIONS): sq.eval() # Calculate the parameters based on the respective derivatives calculated w_in_i_val = tensor_w_in.data()[0] w_in_j_val = tensor_w_in.data()[1] b_in_val = tensor_b_in.data()[0] for j_iter in range(tensor_b_out.size()): w_in_i_val -= learning_rate * tensor_w_out_i.data()[j_iter] w_in_j_val -= learning_rate * tensor_w_out_j.data()[j_iter] b_in_val -= learning_rate * tensor_b_out.data()[j_iter] # Update the parameters to process inference again tensor_w_in.set_data([w_in_i_val, w_in_j_val]) tensor_b_in.set_data([b_in_val]) assert tensor_w_in.data()[0] < 0.01 assert tensor_w_in.data()[0] > 0.0 assert tensor_w_in.data()[1] > 1.5 assert tensor_b_in.data()[0] < 0.7 # Print outputs print(tensor_w_in.data()) print(tensor_b_in.data()) |