clang 20.0.0git
Macros
sm4intrin.h File Reference

Go to the source code of this file.

Macros

#define _mm_sm4key4_epi32(A, B)    (__m128i) __builtin_ia32_vsm4key4128((__v4su)A, (__v4su)B)
 This intrinsic performs four rounds of SM4 key expansion.
 
#define _mm256_sm4key4_epi32(A, B)    (__m256i) __builtin_ia32_vsm4key4256((__v8su)A, (__v8su)B)
 This intrinsic performs four rounds of SM4 key expansion.
 
#define _mm_sm4rnds4_epi32(A, B)    (__m128i) __builtin_ia32_vsm4rnds4128((__v4su)A, (__v4su)B)
 This intrinisc performs four rounds of SM4 encryption.
 
#define _mm256_sm4rnds4_epi32(A, B)    (__m256i) __builtin_ia32_vsm4rnds4256((__v8su)A, (__v8su)B)
 This intrinisc performs four rounds of SM4 encryption.
 

Macro Definition Documentation

◆ _mm256_sm4key4_epi32

#define _mm256_sm4key4_epi32 (   A,
 
)     (__m256i) __builtin_ia32_vsm4key4256((__v8su)A, (__v8su)B)

This intrinsic performs four rounds of SM4 key expansion.

The intrinsic operates on independent 128-bit lanes. The calculated results are stored in dst.

__m256i _mm256_sm4key4_epi32(__m256i __A, __m256i __B)
#define _mm256_sm4key4_epi32(A, B)
This intrinsic performs four rounds of SM4 key expansion.
Definition: sm4intrin.h:138

This intrinsic corresponds to the VSM4KEY4 instruction.

Parameters
__AA 256-bit vector of [8 x int].
__BA 256-bit vector of [8 x int].
Returns
A 256-bit vector of [8 x int].
DEFINE ROL32(dword, n) {
count := n % 32
dest := (dword << count) | (dword >> (32-count))
RETURN dest
}
DEFINE SBOX_BYTE(dword, i) {
RETURN sbox[dword.byte[i]]
}
DEFINE lower_t(dword) {
tmp.byte[0] := SBOX_BYTE(dword, 0)
tmp.byte[1] := SBOX_BYTE(dword, 1)
tmp.byte[2] := SBOX_BYTE(dword, 2)
tmp.byte[3] := SBOX_BYTE(dword, 3)
RETURN tmp
}
DEFINE L_KEY(dword) {
RETURN dword ^ ROL32(dword, 13) ^ ROL32(dword, 23)
}
DEFINE T_KEY(dword) {
RETURN L_KEY(lower_t(dword))
}
DEFINE F_KEY(X0, X1, X2, X3, round_key) {
RETURN X0 ^ T_KEY(X1 ^ X2 ^ X3 ^ round_key)
}
FOR i:= 0 to 1
P[0] := __B.xmm[i].dword[0]
P[1] := __B.xmm[i].dword[1]
P[2] := __B.xmm[i].dword[2]
P[3] := __B.xmm[i].dword[3]
C[0] := F_KEY(P[0], P[1], P[2], P[3], __A.xmm[i].dword[0])
C[1] := F_KEY(P[1], P[2], P[3], C[0], __A.xmm[i].dword[1])
C[2] := F_KEY(P[2], P[3], C[0], C[1], __A.xmm[i].dword[2])
C[3] := F_KEY(P[3], C[0], C[1], C[2], __A.xmm[i].dword[3])
DEST.xmm[i].dword[0] := C[0]
DEST.xmm[i].dword[1] := C[1]
DEST.xmm[i].dword[2] := C[2]
DEST.xmm[i].dword[3] := C[3]
ENDFOR
DEST[MAX:256] := 0

Definition at line 138 of file sm4intrin.h.

◆ _mm256_sm4rnds4_epi32

#define _mm256_sm4rnds4_epi32 (   A,
 
)     (__m256i) __builtin_ia32_vsm4rnds4256((__v8su)A, (__v8su)B)

This intrinisc performs four rounds of SM4 encryption.

The intrinisc operates on independent 128-bit lanes. The calculated results are stored in dst.

__m256i _mm256_sm4rnds4_epi32(__m256i __A, __m256i __B)
#define _mm256_sm4rnds4_epi32(A, B)
This intrinisc performs four rounds of SM4 encryption.
Definition: sm4intrin.h:266

This intrinsic corresponds to the VSM4RNDS4 instruction.

Parameters
__AA 256-bit vector of [8 x int].
__BA 256-bit vector of [8 x int].
Returns
A 256-bit vector of [8 x int].
DEFINE ROL32(dword, n) {
count := n % 32
dest := (dword << count) | (dword >> (32-count))
RETURN dest
}
DEFINE lower_t(dword) {
tmp.byte[0] := SBOX_BYTE(dword, 0)
tmp.byte[1] := SBOX_BYTE(dword, 1)
tmp.byte[2] := SBOX_BYTE(dword, 2)
tmp.byte[3] := SBOX_BYTE(dword, 3)
RETURN tmp
}
DEFINE L_RND(dword) {
tmp := dword
tmp := tmp ^ ROL32(dword, 2)
tmp := tmp ^ ROL32(dword, 10)
tmp := tmp ^ ROL32(dword, 18)
tmp := tmp ^ ROL32(dword, 24)
RETURN tmp
}
DEFINE T_RND(dword) {
RETURN L_RND(lower_t(dword))
}
DEFINE F_RND(X0, X1, X2, X3, round_key) {
RETURN X0 ^ T_RND(X1 ^ X2 ^ X3 ^ round_key)
}
FOR i:= 0 to 0
P[0] := __B.xmm[i].dword[0]
P[1] := __B.xmm[i].dword[1]
P[2] := __B.xmm[i].dword[2]
P[3] := __B.xmm[i].dword[3]
C[0] := F_RND(P[0], P[1], P[2], P[3], __A.xmm[i].dword[0])
C[1] := F_RND(P[1], P[2], P[3], C[0], __A.xmm[i].dword[1])
C[2] := F_RND(P[2], P[3], C[0], C[1], __A.xmm[i].dword[2])
C[3] := F_RND(P[3], C[0], C[1], C[2], __A.xmm[i].dword[3])
DEST.xmm[i].dword[0] := C[0]
DEST.xmm[i].dword[1] := C[1]
DEST.xmm[i].dword[2] := C[2]
DEST.xmm[i].dword[3] := C[3]
ENDFOR
DEST[MAX:256] := 0

Definition at line 266 of file sm4intrin.h.

◆ _mm_sm4key4_epi32

#define _mm_sm4key4_epi32 (   A,
 
)     (__m128i) __builtin_ia32_vsm4key4128((__v4su)A, (__v4su)B)

This intrinsic performs four rounds of SM4 key expansion.

The intrinsic operates on independent 128-bit lanes. The calculated results are stored in dst.

__m128i _mm_sm4key4_epi32(__m128i __A, __m128i __B)
#define _mm_sm4key4_epi32(A, B)
This intrinsic performs four rounds of SM4 key expansion.
Definition: sm4intrin.h:76

This intrinsic corresponds to the VSM4KEY4 instruction.

Parameters
__AA 128-bit vector of [4 x int].
__BA 128-bit vector of [4 x int].
Returns
A 128-bit vector of [4 x int].
DEFINE ROL32(dword, n) {
count := n % 32
dest := (dword << count) | (dword >> (32-count))
RETURN dest
}
DEFINE SBOX_BYTE(dword, i) {
RETURN sbox[dword.byte[i]]
}
DEFINE lower_t(dword) {
tmp.byte[0] := SBOX_BYTE(dword, 0)
tmp.byte[1] := SBOX_BYTE(dword, 1)
tmp.byte[2] := SBOX_BYTE(dword, 2)
tmp.byte[3] := SBOX_BYTE(dword, 3)
RETURN tmp
}
DEFINE L_KEY(dword) {
RETURN dword ^ ROL32(dword, 13) ^ ROL32(dword, 23)
}
DEFINE T_KEY(dword) {
RETURN L_KEY(lower_t(dword))
}
DEFINE F_KEY(X0, X1, X2, X3, round_key) {
RETURN X0 ^ T_KEY(X1 ^ X2 ^ X3 ^ round_key)
}
FOR i:= 0 to 0
P[0] := __B.xmm[i].dword[0]
P[1] := __B.xmm[i].dword[1]
P[2] := __B.xmm[i].dword[2]
P[3] := __B.xmm[i].dword[3]
C[0] := F_KEY(P[0], P[1], P[2], P[3], __A.xmm[i].dword[0])
C[1] := F_KEY(P[1], P[2], P[3], C[0], __A.xmm[i].dword[1])
C[2] := F_KEY(P[2], P[3], C[0], C[1], __A.xmm[i].dword[2])
C[3] := F_KEY(P[3], C[0], C[1], C[2], __A.xmm[i].dword[3])
DEST.xmm[i].dword[0] := C[0]
DEST.xmm[i].dword[1] := C[1]
DEST.xmm[i].dword[2] := C[2]
DEST.xmm[i].dword[3] := C[3]
ENDFOR
DEST[MAX:128] := 0

Definition at line 76 of file sm4intrin.h.

◆ _mm_sm4rnds4_epi32

#define _mm_sm4rnds4_epi32 (   A,
 
)     (__m128i) __builtin_ia32_vsm4rnds4128((__v4su)A, (__v4su)B)

This intrinisc performs four rounds of SM4 encryption.

The intrinisc operates on independent 128-bit lanes. The calculated results are stored in dst.

__m128i _mm_sm4rnds4_epi32(__m128i __A, __m128i __B)
#define _mm_sm4rnds4_epi32(A, B)
This intrinisc performs four rounds of SM4 encryption.
Definition: sm4intrin.h:202

This intrinsic corresponds to the VSM4RNDS4 instruction.

Parameters
__AA 128-bit vector of [4 x int].
__BA 128-bit vector of [4 x int].
Returns
A 128-bit vector of [4 x int].
DEFINE ROL32(dword, n) {
count := n % 32
dest := (dword << count) | (dword >> (32-count))
RETURN dest
}
DEFINE lower_t(dword) {
tmp.byte[0] := SBOX_BYTE(dword, 0)
tmp.byte[1] := SBOX_BYTE(dword, 1)
tmp.byte[2] := SBOX_BYTE(dword, 2)
tmp.byte[3] := SBOX_BYTE(dword, 3)
RETURN tmp
}
DEFINE L_RND(dword) {
tmp := dword
tmp := tmp ^ ROL32(dword, 2)
tmp := tmp ^ ROL32(dword, 10)
tmp := tmp ^ ROL32(dword, 18)
tmp := tmp ^ ROL32(dword, 24)
RETURN tmp
}
DEFINE T_RND(dword) {
RETURN L_RND(lower_t(dword))
}
DEFINE F_RND(X0, X1, X2, X3, round_key) {
RETURN X0 ^ T_RND(X1 ^ X2 ^ X3 ^ round_key)
}
FOR i:= 0 to 0
P[0] := __B.xmm[i].dword[0]
P[1] := __B.xmm[i].dword[1]
P[2] := __B.xmm[i].dword[2]
P[3] := __B.xmm[i].dword[3]
C[0] := F_RND(P[0], P[1], P[2], P[3], __A.xmm[i].dword[0])
C[1] := F_RND(P[1], P[2], P[3], C[0], __A.xmm[i].dword[1])
C[2] := F_RND(P[2], P[3], C[0], C[1], __A.xmm[i].dword[2])
C[3] := F_RND(P[3], C[0], C[1], C[2], __A.xmm[i].dword[3])
DEST.xmm[i].dword[0] := C[0]
DEST.xmm[i].dword[1] := C[1]
DEST.xmm[i].dword[2] := C[2]
DEST.xmm[i].dword[3] := C[3]
ENDFOR
DEST[MAX:128] := 0

Definition at line 202 of file sm4intrin.h.