

您可以使用 Microsoft C++ 編譯器選項 /Qpar-report ,並將 /Qvec-report 自動平行處理和自動向量化 設定 為輸出其活動的原因碼和參考訊息。 本文說明原因代碼和訊息。




資訊訊息 描述
5001 迴圈向量化。
5002 迴圈不會因為原因 ' description ' 而向量化。
5011 迴圈平行化。
5012 迴圈不會平行化,因為原因為 ' description '。
5021 無法與 Pragma 迴圈建立關聯。


5xx 原因代碼

5 xx 原因代碼同時適用于平行化程式和向量化程式。

原因碼 說明
500 涵蓋數個案例的泛型訊息:例如,迴圈包含多個結束,或迴圈標頭不會藉由遞增感應變數而結束。
501 歸納變數不是區域變數;或上限不是迴圈不變。
502 歸納變數用其他的方式逐步執行,並非簡單的 +1。
503 迴圈包含了例外狀況處理或參數陳述式。
504 迴圈主體可能會擲回需要 C ++ 物件的解構函式的例外狀況。
505 外部迴圈具有預先遞增的感應變數。 結束分析。
void code_500(int *A)
    // Code 500 is emitted if the loop has non-vectorizable flow.
    // This can include "if", "break", "continue", the conditional
    // operator "?", or function calls.
    // It also encompasses correct definition and use of the induction
    // variable "i", in that the increment "++i" or "i++" must be the last
    // statement in the loop.

    int i = 0;
    while (i<1000)
        if (i == 4)


        A[i] = A[i] + 1;
    // To resolve code 500, use a 'for' loop with single increment of
    // induction variable.

    for (int i=0; i<1000; ++i)
        A[i] = A[i] + 1;

int bound();
void code_501_example1(int *A)
    // Code 501 is emitted if the compiler cannot discern the
    // induction variable of this loop. In this case, when it checks
    // the upper bound of 'i', the compiler cannot prove that the
    // function call "bound()" returns the same value each time.
    // Also, the compiler cannot prove that the call to "bound()"
    // does not modify the values of array A.

    for (int i=0; i<bound(); ++i)
        A[i] = A[i] + 1;

    // To resolve code 501, ensure that the induction variable is
    // a local variable, and ensure that the upper bound is a
    // provably loop invariant value.

    for (int i=0, imax = bound(); i<imax; ++i)
        A[i] = A[i] + 1;

int i;
void code_501_example2(int *A)
    // Code 501 is emitted if the compiler cannot discern the
    // induction variable of this loop. In this case, 'i' is
    // a global.

    for (i=0; i<1000; ++i)
        A[i] = A[i] + 1;

    // To resolve code 501, ensure that the induction variable is
    // a local variable, and ensure that the upper bound is a
    // provably loop invariant value.

    for (int i=0; i<1000; ++i)
        A[i] = A[i] + 1;

void code_502(int *A)
    // Code 502 is emitted if the compiler cannot discern
    // the induction variable of the loop. In this case,
    // there are three increments to "i", one of which
    // is conditional.

    for (int i=0; i<1000; ++i)
        A[i] = A[i] + 1;

        if (i < 100)

    // To resolve code 502, ensure that there is just one
    // increment of the induction variable, placed in the usual
    // spot in the "for" loop.

    for (int i=0; i<1000; ++i)
        A[i] = A[i] + 1;

void code_503(int *A, int x)
    // Code 503 is emitted if there are inadmissible
    // operations in the loop - for example, exception handling and
    // switch statements.

    for (int i = 0; i<1000; ++i)
        switch (x)
        case 1: A[i] = A[i] + 1;
        case 2: A[i] = A[i] + 2;
        case 3: A[i] = A[i] + 3;

    // To resolve code 503, try to remove as many switch statements
    // and exception handling constructs as possible.

// compile with /EHsc

int code_504_helper();
class C504

void code_504(int *A)
    // Code 504 is emitted if a C++ object was created and
    // that object requires EH unwind tracking information under
    // /EHs or /EHsc.

    for(int i = 0; i < 1000; ++i)
        C504 c;
        A[i] = code_504_helper();


void code_505(int *A)
    // Code 505 is emitted on outer loops with pre-incremented
    // induction variables. The vectorizer/parallelizer analysis
    // package doesn't support these loops, and they are
    // intentionally not converted to post-increment loops to
    // prevent a performance degradation.

    // To parallelize an outer loop that causes code 505, change
    // it to a post-incremented loop.

    for (int i=100; i--; )
        for (int j=0; j<100; j++) { // this loop is still vectorized
            A[j] = A[j] + 1;

10xx 原因代碼

10 xx 原因代碼會套用至平行化程式。

原因碼 說明
1000 編譯器在迴圈主體中偵測到資料的相依性。
1001 編譯器在迴圈主體中偵測到純量變數的存放區,且該純量有在迴圈外的用法。
1002 編譯器嘗試平行處理已具有內部平行化迴圈的迴圈。
1003 迴圈主體包含有可用來讀取或寫入記憶體的內部呼叫。
1004 迴圈主體中有純量縮減。 如果迴圈已向量化,則可能發生純量減少。
1005 no_parallel已指定 pragma。
1006 此函式包含 OpenMP。 移除此函式中的任何 OpenMP 來解決此問題。
1007 迴圈歸納變數或迴圈界限不是帶正負號的 32 位數位( intlong )。 藉由變更歸納變數的類型來解決此問題。
1008 編譯器偵測到此迴圈無法執行足夠的工作來證明自動平行處理。
1009 編譯器偵測到嘗試平行處理 「 do-while 」 迴圈。 自動平行化程式只會以 「 for 」 迴圈為目標。
1010 編譯器偵測到迴圈針對其條件使用 「not-equals」 ( != )。
int A[1000];
void func();
void code_1000()
    // Code 1000 is emitted if the compiler detects a
    // data dependence in the loop body.

    // You can resolve this by using the ivdep pragma.
    // CAUTION -- the compiler will trust your
    // assertion that there are no data dependencies
    // in the loop body. If there are, you are generating
    // code that may have race conditions.

#pragma loop(hint_parallel(0))
    //#pragma loop(ivdep) // ivdep will force this through.
    for (int i=0; i<1000; ++i)
        A[i] = A[i-1] + 1;  // data dependence here
        func();             // data dependence here

int code_1001()
    // Code 1001 is emitted if the compiler detects
    // a store to a scalar variable in the loop
    // body, and that scalar has a use beyond the loop.

    // Resolve this by rewriting your code so
    // that the scalar is not needed.

    int s = 0;
#pragma loop(hint_parallel(0))
    for (int i=0; i<1000; ++i)
        s = A[i];
    return s;

void code_1002()
    // Code 1002 is emitted when the compiler tries to
    // parallelize a loop that has an inner loop that
    // has already been parallelized.

#pragma loop(hint_parallel(0))
    for (int i=0; i<1000; ++i) // emit code 1002 for this loop
#pragma loop(hint_parallel(0))
        for (int j=0; j<1000; ++j) // this loop gets parallelized
            A[j] = A[j] + 1;

extern "C" void __stosb(unsigned char*, unsigned char, size_t);
void code_1003(unsigned char *dst)
    // Code 1003 is emitted when the loop body contains an intrinsic
    // call that may read or write to memory.

    // This can be resolved by using the ivdep pragma.
    // CAUTION -- the compiler will trust your
    // assertion that there are no data dependencies
    // in the loop body. If there are, you are generating
    // code that may have race conditions.

#pragma loop(hint_parallel(0))
    //#pragma loop(ivdep) // ivdep will force this through.
    for (int i=0; i<1000; ++i)
        __stosb(dst, 'c', 10);
        A[i] = A[i] + 1;

int code_1004()
    // Code 1004 is emitted when there is a scalar reduction
    // in the loop body, which can occur if the loop has been
    // vectorized.

    // You can resolve this by rewriting your code so that it
    // does not have a scalar reduction.

    int s = 0;
#pragma loop(hint_parallel(0))
    for (int i=0; i<1000; ++i)
        s += A[i];
    return s;

void code_1005()
    // Code 1005 is emitted when the
    // no_parallel pragma is specified.

#pragma loop(no_parallel)
    for (int i=0; i<1000; ++i)
        A[i] = A[i] + 1;

#include <omp.h>

// Compile with /openmp
void code_1006()
    // Code 1006 is emitted when this function contains
    // openmp. Resolve this by removing any openmp in this
    // function.

    for (int i=0; i<1000; ++i)
        A[i] = A[i] + 1;

#pragma omp parallel num_threads(4)
        int i = omp_get_thread_num();
        A[i] = A[i] + 1;

void code_1007()
    // Code 1007 is emitted when the loop induction variable
    // or the loop bounds are not signed 32-bit numbers (int
    // or long). Resolve this by changing the type of the
    // induction variable.

#pragma loop(hint_parallel(0))
    for (unsigned int i=0; i<1000; ++i)
        A[i] = A[i] + 1;

void code_1008()
    // Code 1008 is emitted when the compiler detects that
    // this loop does not perform enough work to warrant
    // auto-parallelization.

    // You can resolve this by specifying the hint_parallel
    // pragma. CAUTION -- if the loop does not perform
    // enough work, parallelizing might cause a potentially
    // large performance penalty.

    // #pragma loop(hint_parallel(0)) //  hint_parallel will force this through
    for (int i=0; i<1000; ++i)
        A[i] = A[i] + 1;

void code_1009()
    // Code 1009 is emitted when the compiler tries to parallelize a
    // "do-while" loop. The auto-parallelizer only targets "for" loops.

    int i = 0;
#pragma loop(hint_parallel(0))
        A[i] = A[i] + 1;
    while (++i < 1000);

void code_1010()
    // Code 1010 is emitted when the compiler tries to parallelize a
    // loop with a condition code of "!=".

    // You can resolve this by replacing it with an ordering comparator
    // like "<".
#pragma loop(hint_parallel(0))
    for (int i = 0; i != 1000; ++i)

11xx 原因代碼

11 xx 原因碼會套用至向量化工具。

原因碼 說明
1100 迴圈包含控制流程,例如 「 if 」 或 「 ?: 」。
1101 迴圈包含無法向量化的 (可能隱含) 資料類型轉換。
1102 迴圈包含非算術或其他無法向量化的運算。
1103 迴圈主體包括可能會在迴圈內變更大小的移位作業。
1104 迴圈主體包含純量變數。
1105 迴圈包含無法辨識的縮減作業。
1106 外部迴圈未被向量化。
void code_1100(int *A, int x)
    // Code 1100 is emitted when the compiler detects control flow
    // in the loop - for example, "if", the ternary operator "?", and
    // the like. Resolve this by flattening or removing control
    // flow in the loop body.

    // Not all control flow causes 1100; some is indeed
    // vectorized.

    for (int i=0; i<1000; ++i)
        // straight line code is more amenable to vectorization
        if (x)
            A[i] = A[i] + 1;

extern "C" int __readcr0();
void code_1102(int *A)
    // Code 1102 is emitted when the compiler is unable to vectorize
    // an operation in the loop body. For example, intrinsics and other
    // non-arithmetic, non-logical, and non-memory operations are not
    // vectorizable.

    // Resolve this by removing as many non-vectorizable operations
    // as possible from the loop body.

    for (int i=0; i<1000; ++i)
        A[i] = __readcr0();

void code_1103(int *A, int *B)
    // Code 1103 is emitted when the compiler is unable to vectorize
    // a "shift" operation. In this example, there are two shifts
    // that cannot be vectorized.

    for (int i=0; i<1000; ++i)
        A[i] = A[i] >> B[i]; // not vectorizable

        int x = B[i];
        A[i] = A[i] >> x; // not vectorizable

    // To resolve this, ensure that your shift amounts are loop
    // invariant. If the shift amounts cannot be loop invariant,
    // it may not be possible to vectorize this loop.

    int x = B[0];
    for (int i=0; i<1000; ++i)
        A[i] = A[i] >> x; // vectorizable

int code_1104(int *A, int *B)
    // When it vectorizes a loop, the compiler must 'expand' scalar
    // variables to a vector size such that they can fit in
    // vector registers. Code 1104 is emitted when the compiler
    // cannot 'expand' such scalars.

    // In this example, we try to 'expand' x to be used in the
    // vectorized loop. However, there is a use of 'x'
    // beyond the loop body, which prohibits this expansion.

    // To resolve this, try to limit scalars to be used only in
    // the loop body and not beyond, and try to keep their types
    // consistent with the loop types.

    int x;
    for (int i=0; i<1000; ++i)
        x = B[i];
        A[i] = A[i] + x;

    return x;

int code_1105(int *A)
    // The compiler performs an optimization that's known as "reduction"
    // when it operates on each element of an array and computes
    // a resulting scalar value - for example, in this piece of code, which
    // computes the sum of each element in the array:

    int s = 0;
    for (int i=0; i<1000; ++i)
        s += A[i]; // vectorizable

    // The reduction pattern must resemble the loop in the example. The
    // compiler emits code 1105 if it cannot deduce the reduction
    // pattern, as shown in this example:

    for (int i=0; i<1000; ++i)
        s += A[i] + s;  // code 1105

    // Similarly, reductions of "float" or "double" types require
    // that the /fp:fast switch is thrown. Strictly speaking,
    // the reduction optimization that the compiler performs uses
    // "floating point reassociation". Reassociation is only
    // allowed when /fp:fast is thrown.

    return s;

void code_1106(int *A)
    // Code 1106 is emitted when the compiler tries to vectorize
    // an outer loop.

    for (int i=0; i<1000; ++i) // this loop is not vectorized
        for (int j=0; j<1000; ++j) // this loop is vectorized
            A[j] = A[j] + 1;

12xx 原因代碼

12 xx 原因代碼會套用至向量化工具。

原因碼 說明
1200 迴圈包含迴圈傳送的資料相依性,可防止向量化。 迴圈的不同反復專案彼此干擾,讓向量化迴圈會產生錯誤的答案,而且自動向量化工具本身無法證明沒有這類資料相依性。
1201 陣列基底隨著迴圈持續變更。
1202 結構中的欄位不是 32 或 64 位寬。
1203 迴圈主體含有對陣列的非連續存取。
1204 編譯器內部資料結構限制命中:太多資料相依性邊緣。
void fn();
void code_1200(int *A)
    // Code 1200 is emitted when data dependence is prohibiting
    // vectorization. This can only be resolved by rewriting the
    // loop, and considering the marking of loop function calls as
    // __forceinline.

    for (int i=0; i<1000; ++i)
        A[i] = A[i-1] + 1; // vectorization-prohibiting
        fn();               // vectorization-prohibiting

void code_1201(int *A)
    // Code 1201 is emitted when an array base changes
    // in the loop body. Resolve this by rewriting your
    // code so that varying the array base is not necessary.

    for (int i=0; i<1000; ++i)
        A[i] = A[i] + 1;

struct S_1202
    short a;
    short b;
} s[1000];

short sA[1000], sB[1000], sC[1000];

void code_1202(S_1202 *s)
    // Code 1202 is emitted when non-vectorizable struct accesses
    // are present in the loop body. Only struct accesses
    // that are 32 or 64 bits are vectorized.

    for (int i=0; i<1000; ++i)
        s[i].a = s[i].b + 1; // this 16 bit struct access is not vectorizable
        sA[i] += sB[i] * sC[i]; // this ensures we don't emit reason code '1300'

void code_1203(int *A)
    // Code 1203 is emitted when non-vectorizable memory references
    // are present in the loop body. Vectorization of some non-contiguous
    // memory access is supported - for example, the gather/scatter pattern.

    for (int i=0; i<1000; ++i)
        A[i] += A[0] + 1;       // constant memory access not vectorized
        A[i] += A[i*2+2] + 2;  // non-contiguous memory access not vectorized

void code_1204(int *A)
    // Code 1204 is emitted when internal compiler data structures
    // hit a limit on the number of data dependence edges recorded.
    // Resolve this by moving the innermost loop to another function.

    for (int i=0; i<1000; i++)
        for (int j=0; j<1000; j++)
            for (int k=0; k<1000; k++)
                for (int l=0; l<1000; l++)
                    for (int m=0; m<1000; m++)
                        A[m] = A[m+i] + A[m+j] + A[m+k] + A[m+l];

13xx 原因代碼

13 xx 原因代碼會套用至向量化工具。

原因碼 說明
1300 迴圈主體包含很少或沒有計算。
1301 迴圈步幅不是 +1。
1302 迴圈是 「 do-while 」。
1303 迴圈的反覆項目太少使像量化無法提供值。
1304 迴圈包含不同大小的指派工作。
1305 沒有足夠的類型資訊。
void code_1300(int *A, int *B)
    // Code 1300 is emitted when the compiler detects that there is
    // no computation in the loop body.

    for (int i=0; i<1000; ++i)
        A[i] = B[i]; // Do not vectorize, instead emit memcpy

void code_1301(int *A)
    // Code 1301 is emitted when the stride of a loop is not positive 1.
    // Only loops that have a stride of positive 1 are vectorized;
    // rewriting your loop may be required.

    for (int i=0; i<1000; i += 2)
        A[i] = A[i] + 1;

void code_1302(int *A)
    // Code 1302 is emitted for "do-while" loops. Only "while"
    // and "for" loops are vectorized.

    int i = 0;
        A[i] = A[i] + 1;
    } while (++i < 1000);

int code_1303(int *A, int *B)
    // Code 1303 is emitted when the compiler detects that
    // the number of iterations of the loop is too small to
    // make vectorization profitable.

    // If the loop computation fits perfectly in
    // vector registers - for example, the upper bound is 4, or 8 in
    // this case - then the loop _may_ be vectorized.

    // This loop is not vectorized because there are 5 iterations

    for (int i=0; i<5; ++i)
        A[i] = A[i] + 1;

    // This loop is vectorized

    for (int i=0; i<4; ++i)
        A[i] = A[i] + 1;

    // This loop is not vectorized because runtime pointer checks
    // are required to check that A and B don't overlap. It is not
    // worth it to vectorize this loop.

    for (int i=0; i<4; ++i)
        A[i] = B[i] + 1;

    // This loop is not vectorized because of the scalar reduction.

    int s = 0;
    for (int i=0; i<4; ++i)
        s += A[i];
    return s;

void code_1304(int *A, short *B)
    // Code 1304 is emitted when the compiler detects
    // different sized statements in the loop body.
    // In this case, there is an 32-bit statement and a
    // 16-bit statement.

    // In cases like this consider splitting the loop into loops to
    // maximize vector register utilization.

    for (int i=0; i<1000; ++i)
        A[i] = A[i] + 1;
        B[i] = B[i] + 1;

typedef struct S_1305
    int a;
    int b;
} S_1305;

void code_1305( S_1305 *s, S_1305 x)
    // Code 1305 is emitted when the compiler can't discern
    // proper vectorizable type information for this loop.
    // This includes non-scalar loop types such as struct
    // assignments, as in this example.

    // Resolve this by ensuring that your loops have statements
    // that operate on integers or floating point types.

    for (int i=0; i<1000; ++i)
        s[i] = x;

14xx 原因代碼

當指定與向量化不相容的某些選項時,就會發生 14 xx 原因碼。

原因碼 說明
1400 #pragma loop(no_vector) 指定 。
1401 /kernel 當以 x86 或 ARM 為目標時,會指定 switch。
1402 /arch:SSE2 當以 x86 為目標時,未指定或更高的參數。
1403 /arch:ATOM 參數已指定,而且迴圈包含雙精度浮點數上的作業。
1404 /O1/Os 參數已指定。
1405 停用向量化可協助動態初始設定式對靜態初始設定式的最佳化。
void code_1400(int *A)
    // Code 1400 is emitted when the no_vector pragma
    // is specified.

#pragma loop(no_vector)
    for (int i=0; i<1000; ++i)
        A[i] = A[i] + 1;

// Compile with /kernel
void code_1401(int *A)
    // Code 1401 is emitted when /kernel is specified.

    for (int i=0; i<1000; ++i)
        A[i] = A[i] + 1;

// Compile with /arch:IA32
void code_1402(int *A)
    // Code 1401 is emitted when /arch:IA32 is specified.

    for (int i=0; i<1000; ++i)
        A[i] = A[i] + 1;

// Compile with /favor:ATOM
void code_1403(double *A)
    // Code 1401 is emitted when /favor:ATOM is specified, and
    // the loop contains operations on "double" arrays.

    for (int i=0; i<1000; ++i)
        A[i] = A[i] + 1;

// Compile with /O1 or /Os
void code_1404(int *A)
    // Code 1401 is emitted when compiling for size.

    for (int i=0; i<1000; ++i)
        A[i] = A[i] + 1;

15xx 原因代碼

15 xx 原因代碼適用于別名。 記憶體的位置可以由兩個不同的名稱呼叫時,會發生存取別名的情形。

原因碼 說明
1500 在多維陣列的可能別名。
1501 陣列中結構的可能別名。
1502 n + K 以外的可能別名和陣列索引。
1503 可能的別名和陣列索引有多個位移。
1504 可能的別名,可能會要求過多的執行階段檢查。
1505 可能的別名,不過執行階段檢查過於複雜。
void code_1500(int A[100][100], int B[100][100])
    // Code 1500 is emitted when runtime pointer
    // disambiguation checks are required, and
    // there are multidimensional array references.

    for (int i=0; i<100; ++i)
        for (int j=0; j<100; ++j)
            A[i][j] = B[i][j] + 1;

typedef struct S_1501
    int a;
    int b;
} S_1501;

int iA[1000], iB[1000], iC[1000];

void code_1501(S_1501 *s1, S_1501 *s2)
    // Code 1501 is emitted when runtime pointer
    // disambiguation checks are required, and
    // there are array-of-struct accesses in the
    // loop body.

    for (int i=0; i<100; ++i)
        s1[i].a = s2[i].b + 1;
        iA[i] += iB[i] * iC[i]; // this is to ensure we don't emit reason code '1300'

void code_1502(int *A, int *B)
    // Code 1502 is emitted when runtime pointer
    // disambiguation checks are required, and
    // an array reference has an offset that varies
    // in the loop.

    int x = 0;
    for (int i=0; i<100; ++i)
        A[i] = B[i + x] + 1;
        ++x;                   // 'x' varies in the loop

void code_1503(int *A, int *B, int x, int y)
    // Code 1503 is emitted when runtime pointer
    // disambiguation checks are required, and
    // an array reference has multiple offsets.

    for (int i=0; i<100; ++i)
        A[i] = B[i+x] + B[i+y] + 1;   // multiple offsets when addressing 'B': {x, y}
        A[i] = B[i+x] + B[i] + 1;     // multiple offsets when addressing 'B': {x, 0}
        A[i] = B[i+x] + B[i+x] + 1;   // this is vectorized

void code_1504(int *A1, int *A2, int *A3, int *A4,
               int *A5, int *A6, int *A7, int *A8,
               int *A9, int *A10, int *A11, int *A12,
               int *A13, int *A14, int *A15, int *A16)
    // Code 1504 is emitted when too many runtime
    // pointer disambiguation checks are required.

    for (int i=0; i<100; ++i)

void code_1505(int *A, int *B)
    // Code 1505 is emitted when runtime pointer
    // disambiguation checks are required, but are
    // too complex for the compiler to discern.

    for (int i=0; i<100; ++i)
        for (int j=0; j<100; ++j)
            for (int k=0; k<100; ++k)
                A[i+j-k] = B[i-j+k] * 2;


C/C++ 編譯器和建置工具錯誤和警告
Visual Studio 2012 中的自動向量化工具 – 概觀
#pragma loop()
/Q 選項 (低階作業)
/Qpar-report (自動平行化程式報告層級)
/Qvec-report (自動向量化報告層級)