[MIC learning notes] record several usages

into

Using into, you can upload the value of one variable to another, such as in (a into(b)), which means that the value of variable a on the CPU is assigned to variable B on the MIC, or out(b into(c)) can return the value of variable B on the MIC to variable C on the CPU. It should be noted that into can only be used in or out, not in inout or nocopy The following is an example:

#include <stdio.h>

void init_array(int* arr, int n, int start_num){
    int i;
    for(i = 0; i < n; i++) {
        arr[i] = start_num + i;
    }
}

void use_into() {
    int n = 3, i;
    int p[n], p1[n];

    init_array(p, n, 0);
    init_array(p1, n, n);

    for(i = 0; i < n; i++) {
        printf("before offload:  p[%d] is %d\n", i, p[i]);
    }
    for(i = 0; i < n; i++) {
        printf("before offload: p1[%d] is %d\n", i, p1[i]);
    }

    printf("==============================\n");
    //into uploads the value of one variable to another variable. As follows, p has no value on mic, only p1 has a value. After calling out, the original value of p1 will change
#pragma offload target(mic) in(p[0:n] : into(p1[0:n])) out(p1)
    {
        for(i = 0; i < n; i++) {
            printf("On Mic:  p[%d] is %d\n", i, p[i]);
        }
        for(i = 0; i < n; i++) {
            printf("On Mic: p1[%d] is %d\n", i, p1[i]);
        }
    }

    for(i = 0; i < n; i++) {
        printf("after offload:  p[%d] is %d\n", i, p[i]);
    }
    for(i = 0; i < n; i++) {
        printf("after offload: p1[%d] is %d\n", i, p1[i]);
    }

    printf("==============================\n");
}

void use_into2() {
    int n = 4, i;
    int p[n], p1[n+1], p2[n-1];

    init_array(p, n, 0);
    init_array(p1, n+1, n);
    init_array(p2, n-1, 2*n+1);

    for(i = 0; i < n; i++) {
        printf("before offload:	 p[%d] is %d\n", i, p[i]);
    }
    for(i = 0; i < n+1; i++) {
        printf("before offload: p1[%d] is %d\n", i, p1[i]);
    }
    for(i = 0; i < n-1; i++) {
        printf("before offload: p2[%d] is %d\n", i, p2[i]);
    }
    printf("==============================\n");
    // When the array length is different, when length (P) < length (p1), the redundant part of p1 array will be supplemented with 0
    // When length (p) > length (p2), when in, it should be noted that the length of p should not be greater than the length of p2
#pragma offload target(mic) in(p[0:n]:into(p1[0:n+1])) in(p[0:n-1]:into(p2[0:n-1])) out(p1) out(p2)
    {
        for(i = 0; i < n; i++) {
            printf("on mic:  p[%d] is %d\n", i, p[i]);
        }
        for(i = 0; i < n+1; i++) {
            printf("on mic: p1[%d] is %d\n", i, p1[i]);
        }
        for(i = 0; i < n-1; i++) {
            printf("on mic: p2[%d] is %d\n", i, p2[i]);
        }
    }

    for(i = 0; i < n; i++) {
        printf("after offload:  p[%d] is %d\n", i, p[i]);
    }
    for(i = 0; i < n+1; i++) {
        printf("after offload: p1[%d] is %d\n", i, p1[i]);
    }
    for(i = 0; i < n-1; i++) {
        printf("after offload: p2[%d] is %d\n", i, p2[i]);
    }
    printf("==============================\n");
}

// Put one-dimensional array into two-dimensional array and two-dimensional array into one-dimensional array,
// The document says no, but it can be used here
void use_into3() {
    int n = 10, i;
    int p[n * n];
    int a[n][n];

    init_array(p, n * n, 0);
#pragma offload target(mic)  in(p:into(a)) out(a:into(p))
    {
        for(i = 0; i < n; i++) {
            printf("on mic: a[%d][0] is %d\n", i, a[i][0]);

        }

        // Equivalent to p[0]
        a[0][0] = 1000;
        // Equivalent to p[10]
        a[1][0] = 1000;
    }

    printf("p[0] is %d and p[10] is %d\n", p[0], p[10]);
    printf("==============================\n");
}

int main() {
    use_into();
    //use_into2();
    //use_into3();
}

alloc_if and free_if

For pointer variables, new memory will be allocated for each execution of offload. When offload is completed, the memory will be released In order to reuse the space created by offload, mic provides alloc_ If and free_ If to display whether to allocate new memory for the pointer variable of offload (the non pointer variable will report an error if alloc_if and free_if) and whether to release the memory after offload The following are the specific meanings:

  • alloc_ If (1) - allocate new memory for pointer when offload
  • alloc_ If (0) - when offload, it does not open up new memory, but uses the previously reserved memory
  • free_ If (1) - release the memory allocated for the pointer after the offload is completed
  • free_ If (0) - after the offload is completed, the memory corresponding to the pointer will not be released

The default value is alloc_if(1) and free_if(1), in order to make the program clearer, we predefined several macros

#define ALLOC alloc_if(1)
#define FREE free_if(1)
#define RETAIN free_if(0)
#define REUSE alloc_if(0)

The following is a specific example code:

#include <stdio.h>
#include <stdlib.h>

#define ALLOC alloc_if(1)
#define FREE free_if(1)
#define RETAIN free_if(0)
#define REUSE alloc_if(0)

void init_array(int* arr, int n, int start_num){
    int i;
    for(i = 0; i < n; i++) {
        arr[i] = start_num + i;
    }
}

// Alloc is used when there is no unreleased memory on the mic_ If (0) will report an error
void reuse_before_alloc() {
    int n = 10;
    int *p =(int*) calloc(n, sizeof(int));
    int i;
    init_array(p, n, 0);	
    //Of course, this is wrong. offload error: cannot find data associated with pointer variable 0x15e2c60
    //Because there is no existing memory
#pragma offload target(mic) in(p:length(10) REUSE)
    {
        for(i = 0; i < n; i++) {
            printf("the p[%d] id %d\n", i, p[i]);
        }
    }
    free(p);
}

//Memory is saved here. This function should be executed before reusing and reuse2 below to save memory on MIC
void retain() {

    int n = 10;
    int *p =(int*) calloc(n, sizeof(int));
    int i;
    init_array(p, n, 0);	
#pragma offload target(mic) in(p:length(n) RETAIN)
    {
        for(i = 0; i < n; i++) {
            printf("retain: the p[%d] id %d\n", i, p[i]);
        }
    }
    free(p);
}

//The memory space saved above is used here
void reuse() {
    int n = 10;
    int *p =(int*) calloc(n, sizeof(int));
    int i;
    init_array(p, n, 0);
    //If you do not add retain, the memory will be released by default	
#pragma offload target(mic) in(p:length(n) REUSE)
    {
        for(i = 0; i < n; i++) {
            printf("reuse: the p[%d] id %d\n", i, p[i]);
        }
    }
    free(p);
}

// The reused memory cannot be greater than the memory saved on the MIC, and less than is allowed
void reuse2() {
    // If n=11, an error will be reported
    int n = 9;
    int *p =(int*) calloc(n, sizeof(int));
    int i;
    init_array(p, n, 0);
#pragma offload target(mic) in(p:length(n) REUSE)
    {
        for(i = 0; i < n; i++) {
            printf("reuse: the p[%d] is %d\n", i, p[i]);
        }
    }
    free(p);
}


int main(){

    //	reuse_before_alloc();

    retain();
    reuse();

    //	retain();
    //	reuse2();

    return 0;
}

Another problem is that when reusing memory, it seems that two variable names do not need to be the same. See the following code

void retain() {

    int n = 10;
    int *p =(int*) calloc(n, sizeof(int));
    int i;
    init_array(p, n, 0);	
#pragma offload target(mic) in(p:length(n) RETAIN)
    {
        for(i = 0; i < n; i++) {
            printf("retain: the p[%d] id %d\n", i, p[i]);
        }
    }
    free(p);
}

void reuse() {
    int n = 10;
    int *p2 =(int*) calloc(n, sizeof(int));
    int i;
    init_array(p2, n, 0);
#pragma offload target(mic) in(p2:length(n) REUSE)
    {
        for(i = 0; i < n; i++) {
            printf("reuse: the p2[%d] is %d\n", i, p2[i]);
        }
    }
    free(p2);

}

First execute retain, then execute reuse, and the program can still run normally

Applying the target Attribute to Multiple Declarations

When multiple variables or functions need to be used on mic, we can use a more convenient declaration method to add the attribute of target(mic) to these variables and functions. The following is the declaration method:

#pragma offload_attribute(push, target(mic))
...
#pragma offload_attribute(pop)

Variables and functions declared between two #pragma can run on mic. If you want to declare shared variables and functions used in shared virtual memory mode, you can adopt the following form

#pragma offload_attribute(push, _Cilk_shared)
...
#pragma offload_attribute(pop)

Here is an example:

#pragma offload_attribute(push, target(mic))
#include <stdio.h>
#include <stdlib.h>

void test1();
void test2();
#pragma offload_attribute(pop)

int main() {
#pragma offload target(mic)
    test1();

#pragma offload target(mic)
    test2();
}

void test1() {
    printf("this is test1\n");
}

void test2() {
    printf("this is test2\n");
} 

Added by meir4u on Wed, 02 Mar 2022 11:41:20 +0200