use std::arch::x86_64::*; #[no_mangle] #[target_feature(enable = "sse2,bmi1")] pub unsafe fn compare256(src0: &[u8; 256], src1: &[u8; 256]) -> usize { let src0 = src0.chunks_exact(16); let src1 = src1.chunks_exact(16); let mut len = 0; unsafe { for (chunk0, chunk1) in src0.zip(src1) { // load the next chunks into a simd register let xmm_src0 = _mm_loadu_si128(chunk0.as_ptr() as *const __m128i); let xmm_src1 = _mm_loadu_si128(chunk1.as_ptr() as *const __m128i); // element-wise compare of the 8...