m2m模型翻译
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

451 lines
19 KiB

6 months ago
  1. # Code common to build tools
  2. import sys
  3. import warnings
  4. import copy
  5. import textwrap
  6. from numpy.distutils.misc_util import mingw32
  7. #-------------------
  8. # Versioning support
  9. #-------------------
  10. # How to change C_API_VERSION ?
  11. # - increase C_API_VERSION value
  12. # - record the hash for the new C API with the cversions.py script
  13. # and add the hash to cversions.txt
  14. # The hash values are used to remind developers when the C API number was not
  15. # updated - generates a MismatchCAPIWarning warning which is turned into an
  16. # exception for released version.
  17. # Binary compatibility version number. This number is increased whenever the
  18. # C-API is changed such that binary compatibility is broken, i.e. whenever a
  19. # recompile of extension modules is needed.
  20. C_ABI_VERSION = 0x01000009
  21. # Minor API version. This number is increased whenever a change is made to the
  22. # C-API -- whether it breaks binary compatibility or not. Some changes, such
  23. # as adding a function pointer to the end of the function table, can be made
  24. # without breaking binary compatibility. In this case, only the C_API_VERSION
  25. # (*not* C_ABI_VERSION) would be increased. Whenever binary compatibility is
  26. # broken, both C_API_VERSION and C_ABI_VERSION should be increased.
  27. #
  28. # 0x00000008 - 1.7.x
  29. # 0x00000009 - 1.8.x
  30. # 0x00000009 - 1.9.x
  31. # 0x0000000a - 1.10.x
  32. # 0x0000000a - 1.11.x
  33. # 0x0000000a - 1.12.x
  34. # 0x0000000b - 1.13.x
  35. # 0x0000000c - 1.14.x
  36. # 0x0000000c - 1.15.x
  37. # 0x0000000d - 1.16.x
  38. # 0x0000000d - 1.19.x
  39. # 0x0000000e - 1.20.x
  40. # 0x0000000e - 1.21.x
  41. C_API_VERSION = 0x0000000e
  42. class MismatchCAPIWarning(Warning):
  43. pass
  44. def is_released(config):
  45. """Return True if a released version of numpy is detected."""
  46. from distutils.version import LooseVersion
  47. v = config.get_version('../_version.py')
  48. if v is None:
  49. raise ValueError("Could not get version")
  50. pv = LooseVersion(vstring=v).version
  51. if len(pv) > 3:
  52. return False
  53. return True
  54. def get_api_versions(apiversion, codegen_dir):
  55. """
  56. Return current C API checksum and the recorded checksum.
  57. Return current C API checksum and the recorded checksum for the given
  58. version of the C API version.
  59. """
  60. # Compute the hash of the current API as defined in the .txt files in
  61. # code_generators
  62. sys.path.insert(0, codegen_dir)
  63. try:
  64. m = __import__('genapi')
  65. numpy_api = __import__('numpy_api')
  66. curapi_hash = m.fullapi_hash(numpy_api.full_api)
  67. apis_hash = m.get_versions_hash()
  68. finally:
  69. del sys.path[0]
  70. return curapi_hash, apis_hash[apiversion]
  71. def check_api_version(apiversion, codegen_dir):
  72. """Emits a MismatchCAPIWarning if the C API version needs updating."""
  73. curapi_hash, api_hash = get_api_versions(apiversion, codegen_dir)
  74. # If different hash, it means that the api .txt files in
  75. # codegen_dir have been updated without the API version being
  76. # updated. Any modification in those .txt files should be reflected
  77. # in the api and eventually abi versions.
  78. # To compute the checksum of the current API, use numpy/core/cversions.py
  79. if not curapi_hash == api_hash:
  80. msg = ("API mismatch detected, the C API version "
  81. "numbers have to be updated. Current C api version is %d, "
  82. "with checksum %s, but recorded checksum for C API version %d "
  83. "in core/codegen_dir/cversions.txt is %s. If functions were "
  84. "added in the C API, you have to update C_API_VERSION in %s."
  85. )
  86. warnings.warn(msg % (apiversion, curapi_hash, apiversion, api_hash,
  87. __file__),
  88. MismatchCAPIWarning, stacklevel=2)
  89. # Mandatory functions: if not found, fail the build
  90. MANDATORY_FUNCS = ["sin", "cos", "tan", "sinh", "cosh", "tanh", "fabs",
  91. "floor", "ceil", "sqrt", "log10", "log", "exp", "asin",
  92. "acos", "atan", "fmod", 'modf', 'frexp', 'ldexp']
  93. # Standard functions which may not be available and for which we have a
  94. # replacement implementation. Note that some of these are C99 functions.
  95. OPTIONAL_STDFUNCS = ["expm1", "log1p", "acosh", "asinh", "atanh",
  96. "rint", "trunc", "exp2", "log2", "hypot", "atan2", "pow",
  97. "copysign", "nextafter", "ftello", "fseeko",
  98. "strtoll", "strtoull", "cbrt", "strtold_l", "fallocate",
  99. "backtrace", "madvise"]
  100. OPTIONAL_HEADERS = [
  101. # sse headers only enabled automatically on amd64/x32 builds
  102. "xmmintrin.h", # SSE
  103. "emmintrin.h", # SSE2
  104. "immintrin.h", # AVX
  105. "features.h", # for glibc version linux
  106. "xlocale.h", # see GH#8367
  107. "dlfcn.h", # dladdr
  108. "sys/mman.h", #madvise
  109. ]
  110. # optional gcc compiler builtins and their call arguments and optional a
  111. # required header and definition name (HAVE_ prepended)
  112. # call arguments are required as the compiler will do strict signature checking
  113. OPTIONAL_INTRINSICS = [("__builtin_isnan", '5.'),
  114. ("__builtin_isinf", '5.'),
  115. ("__builtin_isfinite", '5.'),
  116. ("__builtin_bswap32", '5u'),
  117. ("__builtin_bswap64", '5u'),
  118. ("__builtin_expect", '5, 0'),
  119. ("__builtin_mul_overflow", '5, 5, (int*)5'),
  120. # MMX only needed for icc, but some clangs don't have it
  121. ("_m_from_int64", '0', "emmintrin.h"),
  122. ("_mm_load_ps", '(float*)0', "xmmintrin.h"), # SSE
  123. ("_mm_prefetch", '(float*)0, _MM_HINT_NTA',
  124. "xmmintrin.h"), # SSE
  125. ("_mm_load_pd", '(double*)0', "emmintrin.h"), # SSE2
  126. ("__builtin_prefetch", "(float*)0, 0, 3"),
  127. # check that the linker can handle avx
  128. ("__asm__ volatile", '"vpand %xmm1, %xmm2, %xmm3"',
  129. "stdio.h", "LINK_AVX"),
  130. ("__asm__ volatile", '"vpand %ymm1, %ymm2, %ymm3"',
  131. "stdio.h", "LINK_AVX2"),
  132. ("__asm__ volatile", '"vpaddd %zmm1, %zmm2, %zmm3"',
  133. "stdio.h", "LINK_AVX512F"),
  134. ("__asm__ volatile", '"vfpclasspd $0x40, %zmm15, %k6\\n"\
  135. "vmovdqu8 %xmm0, %xmm1\\n"\
  136. "vpbroadcastmb2q %k0, %xmm0\\n"',
  137. "stdio.h", "LINK_AVX512_SKX"),
  138. ("__asm__ volatile", '"xgetbv"', "stdio.h", "XGETBV"),
  139. ]
  140. # function attributes
  141. # tested via "int %s %s(void *);" % (attribute, name)
  142. # function name will be converted to HAVE_<upper-case-name> preprocessor macro
  143. OPTIONAL_FUNCTION_ATTRIBUTES = [('__attribute__((optimize("unroll-loops")))',
  144. 'attribute_optimize_unroll_loops'),
  145. ('__attribute__((optimize("O3")))',
  146. 'attribute_optimize_opt_3'),
  147. ('__attribute__((nonnull (1)))',
  148. 'attribute_nonnull'),
  149. ('__attribute__((target ("avx")))',
  150. 'attribute_target_avx'),
  151. ('__attribute__((target ("avx2")))',
  152. 'attribute_target_avx2'),
  153. ('__attribute__((target ("avx512f")))',
  154. 'attribute_target_avx512f'),
  155. ('__attribute__((target ("avx512f,avx512dq,avx512bw,avx512vl,avx512cd")))',
  156. 'attribute_target_avx512_skx'),
  157. ]
  158. # function attributes with intrinsics
  159. # To ensure your compiler can compile avx intrinsics with just the attributes
  160. # gcc 4.8.4 support attributes but not with intrisics
  161. # tested via "#include<%s> int %s %s(void *){code; return 0;};" % (header, attribute, name, code)
  162. # function name will be converted to HAVE_<upper-case-name> preprocessor macro
  163. # The _mm512_castps_si512 instruction is specific check for AVX-512F support
  164. # in gcc-4.9 which is missing a subset of intrinsics. See
  165. # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61878
  166. OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS = [('__attribute__((target("avx2,fma")))',
  167. 'attribute_target_avx2_with_intrinsics',
  168. '__m256 temp = _mm256_set1_ps(1.0); temp = \
  169. _mm256_fmadd_ps(temp, temp, temp)',
  170. 'immintrin.h'),
  171. ('__attribute__((target("avx512f")))',
  172. 'attribute_target_avx512f_with_intrinsics',
  173. '__m512i temp = _mm512_castps_si512(_mm512_set1_ps(1.0))',
  174. 'immintrin.h'),
  175. ('__attribute__((target ("avx512f,avx512dq,avx512bw,avx512vl,avx512cd")))',
  176. 'attribute_target_avx512_skx_with_intrinsics',
  177. '__mmask8 temp = _mm512_fpclass_pd_mask(_mm512_set1_pd(1.0), 0x01);\
  178. __m512i unused_temp = \
  179. _mm512_castps_si512(_mm512_set1_ps(1.0));\
  180. _mm_mask_storeu_epi8(NULL, 0xFF, _mm_broadcastmb_epi64(temp))',
  181. 'immintrin.h'),
  182. ]
  183. # variable attributes tested via "int %s a" % attribute
  184. OPTIONAL_VARIABLE_ATTRIBUTES = ["__thread", "__declspec(thread)"]
  185. # Subset of OPTIONAL_STDFUNCS which may already have HAVE_* defined by Python.h
  186. OPTIONAL_STDFUNCS_MAYBE = [
  187. "expm1", "log1p", "acosh", "atanh", "asinh", "hypot", "copysign",
  188. "ftello", "fseeko"
  189. ]
  190. # C99 functions: float and long double versions
  191. C99_FUNCS = [
  192. "sin", "cos", "tan", "sinh", "cosh", "tanh", "fabs", "floor", "ceil",
  193. "rint", "trunc", "sqrt", "log10", "log", "log1p", "exp", "expm1",
  194. "asin", "acos", "atan", "asinh", "acosh", "atanh", "hypot", "atan2",
  195. "pow", "fmod", "modf", 'frexp', 'ldexp', "exp2", "log2", "copysign",
  196. "nextafter", "cbrt"
  197. ]
  198. C99_FUNCS_SINGLE = [f + 'f' for f in C99_FUNCS]
  199. C99_FUNCS_EXTENDED = [f + 'l' for f in C99_FUNCS]
  200. C99_COMPLEX_TYPES = [
  201. 'complex double', 'complex float', 'complex long double'
  202. ]
  203. C99_COMPLEX_FUNCS = [
  204. "cabs", "cacos", "cacosh", "carg", "casin", "casinh", "catan",
  205. "catanh", "ccos", "ccosh", "cexp", "cimag", "clog", "conj", "cpow",
  206. "cproj", "creal", "csin", "csinh", "csqrt", "ctan", "ctanh"
  207. ]
  208. def fname2def(name):
  209. return "HAVE_%s" % name.upper()
  210. def sym2def(symbol):
  211. define = symbol.replace(' ', '')
  212. return define.upper()
  213. def type2def(symbol):
  214. define = symbol.replace(' ', '_')
  215. return define.upper()
  216. # Code to detect long double representation taken from MPFR m4 macro
  217. def check_long_double_representation(cmd):
  218. cmd._check_compiler()
  219. body = LONG_DOUBLE_REPRESENTATION_SRC % {'type': 'long double'}
  220. # Disable whole program optimization (the default on vs2015, with python 3.5+)
  221. # which generates intermediary object files and prevents checking the
  222. # float representation.
  223. if sys.platform == "win32" and not mingw32():
  224. try:
  225. cmd.compiler.compile_options.remove("/GL")
  226. except (AttributeError, ValueError):
  227. pass
  228. # Disable multi-file interprocedural optimization in the Intel compiler on Linux
  229. # which generates intermediary object files and prevents checking the
  230. # float representation.
  231. elif (sys.platform != "win32"
  232. and cmd.compiler.compiler_type.startswith('intel')
  233. and '-ipo' in cmd.compiler.cc_exe):
  234. newcompiler = cmd.compiler.cc_exe.replace(' -ipo', '')
  235. cmd.compiler.set_executables(
  236. compiler=newcompiler,
  237. compiler_so=newcompiler,
  238. compiler_cxx=newcompiler,
  239. linker_exe=newcompiler,
  240. linker_so=newcompiler + ' -shared'
  241. )
  242. # We need to use _compile because we need the object filename
  243. src, obj = cmd._compile(body, None, None, 'c')
  244. try:
  245. ltype = long_double_representation(pyod(obj))
  246. return ltype
  247. except ValueError:
  248. # try linking to support CC="gcc -flto" or icc -ipo
  249. # struct needs to be volatile so it isn't optimized away
  250. # additionally "clang -flto" requires the foo struct to be used
  251. body = body.replace('struct', 'volatile struct')
  252. body += "int main(void) { return foo.before[0]; }\n"
  253. src, obj = cmd._compile(body, None, None, 'c')
  254. cmd.temp_files.append("_configtest")
  255. cmd.compiler.link_executable([obj], "_configtest")
  256. ltype = long_double_representation(pyod("_configtest"))
  257. return ltype
  258. finally:
  259. cmd._clean()
  260. LONG_DOUBLE_REPRESENTATION_SRC = r"""
  261. /* "before" is 16 bytes to ensure there's no padding between it and "x".
  262. * We're not expecting any "long double" bigger than 16 bytes or with
  263. * alignment requirements stricter than 16 bytes. */
  264. typedef %(type)s test_type;
  265. struct {
  266. char before[16];
  267. test_type x;
  268. char after[8];
  269. } foo = {
  270. { '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
  271. '\001', '\043', '\105', '\147', '\211', '\253', '\315', '\357' },
  272. -123456789.0,
  273. { '\376', '\334', '\272', '\230', '\166', '\124', '\062', '\020' }
  274. };
  275. """
  276. def pyod(filename):
  277. """Python implementation of the od UNIX utility (od -b, more exactly).
  278. Parameters
  279. ----------
  280. filename : str
  281. name of the file to get the dump from.
  282. Returns
  283. -------
  284. out : seq
  285. list of lines of od output
  286. Notes
  287. -----
  288. We only implement enough to get the necessary information for long double
  289. representation, this is not intended as a compatible replacement for od.
  290. """
  291. out = []
  292. with open(filename, 'rb') as fid:
  293. yo2 = [oct(o)[2:] for o in fid.read()]
  294. for i in range(0, len(yo2), 16):
  295. line = ['%07d' % int(oct(i)[2:])]
  296. line.extend(['%03d' % int(c) for c in yo2[i:i+16]])
  297. out.append(" ".join(line))
  298. return out
  299. _BEFORE_SEQ = ['000', '000', '000', '000', '000', '000', '000', '000',
  300. '001', '043', '105', '147', '211', '253', '315', '357']
  301. _AFTER_SEQ = ['376', '334', '272', '230', '166', '124', '062', '020']
  302. _IEEE_DOUBLE_BE = ['301', '235', '157', '064', '124', '000', '000', '000']
  303. _IEEE_DOUBLE_LE = _IEEE_DOUBLE_BE[::-1]
  304. _INTEL_EXTENDED_12B = ['000', '000', '000', '000', '240', '242', '171', '353',
  305. '031', '300', '000', '000']
  306. _INTEL_EXTENDED_16B = ['000', '000', '000', '000', '240', '242', '171', '353',
  307. '031', '300', '000', '000', '000', '000', '000', '000']
  308. _MOTOROLA_EXTENDED_12B = ['300', '031', '000', '000', '353', '171',
  309. '242', '240', '000', '000', '000', '000']
  310. _IEEE_QUAD_PREC_BE = ['300', '031', '326', '363', '105', '100', '000', '000',
  311. '000', '000', '000', '000', '000', '000', '000', '000']
  312. _IEEE_QUAD_PREC_LE = _IEEE_QUAD_PREC_BE[::-1]
  313. _IBM_DOUBLE_DOUBLE_BE = (['301', '235', '157', '064', '124', '000', '000', '000'] +
  314. ['000'] * 8)
  315. _IBM_DOUBLE_DOUBLE_LE = (['000', '000', '000', '124', '064', '157', '235', '301'] +
  316. ['000'] * 8)
  317. def long_double_representation(lines):
  318. """Given a binary dump as given by GNU od -b, look for long double
  319. representation."""
  320. # Read contains a list of 32 items, each item is a byte (in octal
  321. # representation, as a string). We 'slide' over the output until read is of
  322. # the form before_seq + content + after_sequence, where content is the long double
  323. # representation:
  324. # - content is 12 bytes: 80 bits Intel representation
  325. # - content is 16 bytes: 80 bits Intel representation (64 bits) or quad precision
  326. # - content is 8 bytes: same as double (not implemented yet)
  327. read = [''] * 32
  328. saw = None
  329. for line in lines:
  330. # we skip the first word, as od -b output an index at the beginning of
  331. # each line
  332. for w in line.split()[1:]:
  333. read.pop(0)
  334. read.append(w)
  335. # If the end of read is equal to the after_sequence, read contains
  336. # the long double
  337. if read[-8:] == _AFTER_SEQ:
  338. saw = copy.copy(read)
  339. # if the content was 12 bytes, we only have 32 - 8 - 12 = 12
  340. # "before" bytes. In other words the first 4 "before" bytes went
  341. # past the sliding window.
  342. if read[:12] == _BEFORE_SEQ[4:]:
  343. if read[12:-8] == _INTEL_EXTENDED_12B:
  344. return 'INTEL_EXTENDED_12_BYTES_LE'
  345. if read[12:-8] == _MOTOROLA_EXTENDED_12B:
  346. return 'MOTOROLA_EXTENDED_12_BYTES_BE'
  347. # if the content was 16 bytes, we are left with 32-8-16 = 16
  348. # "before" bytes, so 8 went past the sliding window.
  349. elif read[:8] == _BEFORE_SEQ[8:]:
  350. if read[8:-8] == _INTEL_EXTENDED_16B:
  351. return 'INTEL_EXTENDED_16_BYTES_LE'
  352. elif read[8:-8] == _IEEE_QUAD_PREC_BE:
  353. return 'IEEE_QUAD_BE'
  354. elif read[8:-8] == _IEEE_QUAD_PREC_LE:
  355. return 'IEEE_QUAD_LE'
  356. elif read[8:-8] == _IBM_DOUBLE_DOUBLE_LE:
  357. return 'IBM_DOUBLE_DOUBLE_LE'
  358. elif read[8:-8] == _IBM_DOUBLE_DOUBLE_BE:
  359. return 'IBM_DOUBLE_DOUBLE_BE'
  360. # if the content was 8 bytes, left with 32-8-8 = 16 bytes
  361. elif read[:16] == _BEFORE_SEQ:
  362. if read[16:-8] == _IEEE_DOUBLE_LE:
  363. return 'IEEE_DOUBLE_LE'
  364. elif read[16:-8] == _IEEE_DOUBLE_BE:
  365. return 'IEEE_DOUBLE_BE'
  366. if saw is not None:
  367. raise ValueError("Unrecognized format (%s)" % saw)
  368. else:
  369. # We never detected the after_sequence
  370. raise ValueError("Could not lock sequences (%s)" % saw)
  371. def check_for_right_shift_internal_compiler_error(cmd):
  372. """
  373. On our arm CI, this fails with an internal compilation error
  374. The failure looks like the following, and can be reproduced on ARM64 GCC 5.4:
  375. <source>: In function 'right_shift':
  376. <source>:4:20: internal compiler error: in expand_shift_1, at expmed.c:2349
  377. ip1[i] = ip1[i] >> in2;
  378. ^
  379. Please submit a full bug report,
  380. with preprocessed source if appropriate.
  381. See <http://gcc.gnu.org/bugs.html> for instructions.
  382. Compiler returned: 1
  383. This function returns True if this compiler bug is present, and we need to
  384. turn off optimization for the function
  385. """
  386. cmd._check_compiler()
  387. has_optimize = cmd.try_compile(textwrap.dedent("""\
  388. __attribute__((optimize("O3"))) void right_shift() {}
  389. """), None, None)
  390. if not has_optimize:
  391. return False
  392. no_err = cmd.try_compile(textwrap.dedent("""\
  393. typedef long the_type; /* fails also for unsigned and long long */
  394. __attribute__((optimize("O3"))) void right_shift(the_type in2, the_type *ip1, int n) {
  395. for (int i = 0; i < n; i++) {
  396. if (in2 < (the_type)sizeof(the_type) * 8) {
  397. ip1[i] = ip1[i] >> in2;
  398. }
  399. }
  400. }
  401. """), None, None)
  402. return not no_err