| // Copyright 2022 The Abseil Authors. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // https://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| #ifndef ABSL_BASE_INTERNAL_PREFETCH_H_ |
| #define ABSL_BASE_INTERNAL_PREFETCH_H_ |
| |
| #include "absl/base/config.h" |
| |
| #ifdef __SSE__ |
| #include <xmmintrin.h> |
| #endif |
| |
| #if defined(_MSC_VER) && defined(ABSL_INTERNAL_HAVE_SSE) |
| #include <intrin.h> |
| #pragma intrinsic(_mm_prefetch) |
| #endif |
| |
| // Compatibility wrappers around __builtin_prefetch, to prefetch data |
| // for read if supported by the toolchain. |
| |
| // Move data into the cache before it is read, or "prefetch" it. |
| // |
| // The value of `addr` is the address of the memory to prefetch. If |
| // the target and compiler support it, data prefetch instructions are |
| // generated. If the prefetch is done some time before the memory is |
| // read, it may be in the cache by the time the read occurs. |
| // |
| // The function names specify the temporal locality heuristic applied, |
| // using the names of Intel prefetch instructions: |
| // |
| // T0 - high degree of temporal locality; data should be left in as |
| // many levels of the cache possible |
| // T1 - moderate degree of temporal locality |
| // T2 - low degree of temporal locality |
| // Nta - no temporal locality, data need not be left in the cache |
| // after the read |
| // |
| // Incorrect or gratuitous use of these functions can degrade |
| // performance, so use them only when representative benchmarks show |
| // an improvement. |
| // |
| // Example usage: |
| // |
| // absl::base_internal::PrefetchT0(addr); |
| // |
| // Currently, the different prefetch calls behave on some Intel |
| // architectures as follows: |
| // |
| // SNB..SKL SKX |
| // PrefetchT0() L1/L2/L3 L1/L2 |
| // PrefetchT1() L2/L3 L2 |
| // PrefetchT2() L2/L3 L2 |
| // PrefetchNta() L1/--/L3 L1* |
| // |
| // * On SKX PrefetchNta() will bring the line into L1 but will evict |
| // from L3 cache. This might result in surprising behavior. |
| // |
| // SNB = Sandy Bridge, SKL = Skylake, SKX = Skylake Xeon. |
| // |
| namespace absl { |
| ABSL_NAMESPACE_BEGIN |
| namespace base_internal { |
| |
| void PrefetchT0(const void* addr); |
| void PrefetchT1(const void* addr); |
| void PrefetchT2(const void* addr); |
| void PrefetchNta(const void* addr); |
| |
| // Implementation details follow. |
| |
| #if ABSL_HAVE_BUILTIN(__builtin_prefetch) || defined(__GNUC__) |
| |
| #define ABSL_INTERNAL_HAVE_PREFETCH 1 |
| |
| // See __builtin_prefetch: |
| // https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html. |
| // |
| // These functions speculatively load for read only. This is |
| // safe for all currently supported platforms. However, prefetch for |
| // store may have problems depending on the target platform. |
| // |
| inline void PrefetchT0(const void* addr) { |
| // Note: this uses prefetcht0 on Intel. |
| __builtin_prefetch(addr, 0, 3); |
| } |
| inline void PrefetchT1(const void* addr) { |
| // Note: this uses prefetcht1 on Intel. |
| __builtin_prefetch(addr, 0, 2); |
| } |
| inline void PrefetchT2(const void* addr) { |
| // Note: this uses prefetcht2 on Intel. |
| __builtin_prefetch(addr, 0, 1); |
| } |
| inline void PrefetchNta(const void* addr) { |
| // Note: this uses prefetchtnta on Intel. |
| __builtin_prefetch(addr, 0, 0); |
| } |
| |
| #elif defined(ABSL_INTERNAL_HAVE_SSE) |
| |
| #define ABSL_INTERNAL_HAVE_PREFETCH 1 |
| |
| inline void PrefetchT0(const void* addr) { |
| _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_T0); |
| } |
| inline void PrefetchT1(const void* addr) { |
| _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_T1); |
| } |
| inline void PrefetchT2(const void* addr) { |
| _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_T2); |
| } |
| inline void PrefetchNta(const void* addr) { |
| _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_NTA); |
| } |
| |
| #else |
| inline void PrefetchT0(const void*) {} |
| inline void PrefetchT1(const void*) {} |
| inline void PrefetchT2(const void*) {} |
| inline void PrefetchNta(const void*) {} |
| #endif |
| |
| } // namespace base_internal |
| ABSL_NAMESPACE_END |
| } // namespace absl |
| |
| #endif // ABSL_BASE_INTERNAL_PREFETCH_H_ |