
    Pg1             -       i   d dl Z d dlmZ d dlmZ d dlmZmZmZm	Z	m
Z
 d dlZd dlmZ d dlmZmZmZ d dlmZmZmZmZ d dlmZ d dlmZmZ d d	lmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z% d d
l&m'Z'm(Z(m)Z)m*Z*m+Z+ d dl,m-Z-m.Z. d dl/m0Z1 ejd                  jf                  Z3ejh                  jk                  ddd      Z6d Z7de fdZ8d Z9d Z: e7e3jv                  e3jx                  g       e+       dddejz                  ddfd              Z> e7e3j~                  j                  e3j~                  j                  g       e+       d               ZB e7e3j                  j                  e3j                  j                  g       e+       ddd              ZC e7e3j                         e+       d               ZD e7e3j                  j                  e3j                  j                  e3j                  j                  e3j                  j                  g       e+dd      d               ZG e7e3j                  j                  e3j                  j                  g       e+       d               ZHd  ZI e7e3j                  j                  e3j                  j                  g       e+       d!               ZK e7e3j                  j                  e3j                  j                  g       e+       d"               ZM e7e3j                  j                        dd#d$       ZP e7e3j                  j                        ej                  dddd%d&       ZR e7e3j                  j                  e3j                  j                  g       e+       ej                  dddd%d'              ZT e7e3j                  j                  e3j                  j                  g       e+       ej                  dddd%d(              ZW e7e3j                  j                  e3j                  j                  g       e+       ddddd%d)              ZY e7e3j                  j                  e3j                  j                  g       e+       d*               Z[ e7e3j                  j                        dd+       Z]d, Z^ e7e3j                  j                        d-        Z` e7e3j                        	 	 	 dd.ed/ed0ed1ee   d2eeb   d3eej                     fd4       Zd e7e3j                        	 dd5ed6ed7ed3eej                     fd8       Zf e7e3j                        d9d9dd:d.ed5ed6ed7ed3eej                     f
d;       Zh e7e3j                        	 	 	 	 	 	 	 dd<ej"                  d=ej"                  d1ee   d>ee   d3eej                     d?ejd@ekdAekdBejfdC       Zl e7e3j                  j                        dDdEdFedGekdHedIej"                  dJebdKejdLefdM       Zn e7e3j                  j                        dDdEdFedGekdHedIej"                  dJebdKejdLefdN       Zp e+        e7e3j                  j                        dO               Zr e7e3j                  j                        dddd dddPdQedJebdRee   dee   dSee   dTekdUejdLefdV       Zt e7e3j                  j                  e3j                  j                  g       e+       dW               Zw e7e3j                  j                        ddX       Zy e7e3j                  j                  e3j                  j                  g       e+       dY               Z{ e7e3j                  j                        ddZ       Z| e7e3j                  j                        d[        Z~ e7e3j                  j                        d\        Z e7e3j                   j                        d]        Z e7e3j                   j                        d^        Z e7e3j                  j                        d_        Z e7e3j                  j                        dddddd`da       Z e7e3j                  j                        ddb       Z e7e3j                  j                        ddc       Z e7e3j                  j                        ddd       Z e7e3j                  j                        de        Z e7e3j                  j                        df        ZdFedgebfdhZdFediedjebfdkZ	 ddledgebdmejfdnZddiedgebdoebfdpZdiedqedrejdgebfdsZ	 dduebdted.edvebfdwZdxebfdyZ e7e3j.                  j                  e3j.                  j0                  g       e+dzd{      ddiedxebd}ejfd~              Z e7e3j4                  j                  e3j6                  j                  g       e+       d.edLefd              Z e7e3j:                  g       e+dzd{      d.efd              ZdedLefdZ e7e3j@                         e+       dFediedejdLefd              Z e7e3jB                         e+       ddFediedejdLefd              Z e7e3jD                         e+       ddFedejdLefd              Z e7e3jF                         e+       ddFedejdLefd              Z e7e3jH                  j                        ddiedejdejfd       Z e7e3jJ                  j                  e3jJ                  j                  g       e+       d.ededLefd              Z e7e3jL                  j                        ddiedejfd       Z e7e3jP                  j                  e3jP                  j                  g       e+ddd      ddddFedejdejdLe	eeef   fd              Z e7e3jT                  j                  e3jT                  j                  g       e+       dddededqedejdLef
d              Z e7e3jX                  j                  e3jX                  j                  g       e+dd|d      dDddiedejdLe	eeef   fd              Z e7e3j\                  j                  e3j\                  j                  g       e+ddd      dDdddiedejdejdLe	eeef   fd              Z e7e3j`                  j                  e3j`                  j                  g       e+       dDdddededqedrejdejdLefd              Z e7e3jd                         e+dd|d      	 	 ddededejdejdLe	eeef   f
d              ZdebdLe	ejejf   fdZ e7e3jj                  j                  e3jj                  j                  g       e+dd      ddiedebdLe	eef   fd              Z e7e3jn                  j                  e3jn                  jp                  g       e+dddd      diedLe	eeeef   fd              Z e7e3jr                  j                        	 	 	 ddiedejdejdeeb   fd       ZdededLe	eek   eek   f   fdZdededjeeb   dLe	eef   fdZd.ededLejfdZ e7e3j|                        dDdddddddiedqedrejdejdtee   dee   dee   dee   dLe	eeeef   fd       Z e7e3j~                  j                  e3j~                  j                  g      dDddddiedqedejdrejdejdee   dLefd       Z e7e3j                         e+dddD      	 	 	 ddFediedejdejdejdLe	eef   fd              Z e7e3j                  j                        d        Z e7e3j                         e+       	 	 dd.edededrejdejdLefd              Zd ZdÄ Z e7e3j                         e+       dĄ               Z e7e3j                         e+       dń               ZdƄ Z e7e3j                         e+dǫ      dȄ               Z e7e3j                         e+dǫ      dɄ               Zdʄ Z e7e3j                         e+       d˄               Z e7e3j                         e+       d̄               Z e7e3j                  j                  e3j                  j                  e3j                  j                  e3j                  j                  g       e+dǫ      d̈́               Zd΄ Z e7e3j                         e+       dτ               Z e7e3j                         e+       dЄ               Z e7e3j                  j                  e3j                  j                  e3j                  j                  e3j                  j                  g       e+dǫ      dф               Z e7e3j                         e+       d dFededLefdӄ              Z e7e3j                         e+       dedFedededLef
dք              Z e7e3j                  j                  e3j                  j                  g       e+       d9d9dלd؄              Z e7e3j                  j                  e3j                  j                  g       e+       dd#dل              Z e7e3j                  j                        ddڄ       Z e7e3j                  j                        ddۄ       Z e7e3j                  j                  e3j                  j                  g       e+       dd܄              Z e7e3j                  j                        	 	 dd݄       Z e7e3j                         e+       dބ               Zd߄ ZddZ	 ddej"                  d/ej"                  de
eek   ekf   de
eek   ekf   de
eek   ekf   dejdekdee
eek   ekf      fdZd Z e7e3j                  j                        dej"                  d/ej"                  d1eej"                     deej"                     deej"                     dejdedefd       Z e7e3j                  j                        dej"                  d/ej"                  d1ej"                  deek   deek   deek   dejdeek   dekfd       Zej                  j                  r{ejh                  jk                  ddd      Z e7ejd                  j                   j                  j                        d        Z e7ejd                  j                   j                  j                        d        Zej                  j
                  rMejh                  jk                  ddd      Z e7ejd                  j                  j                        d        Z	ejh                  jk                  ddd      Z
 e7ejd                  j                  j                  j                        d        Z e7ejd                  j                  j                  j                         e7ejd                  j                  j                  j                        d               Z e7ejd                  j                  j"                  j                         e7ejd                  j                  j$                  j                        d               Zejh                  jk                  ddd      Z e7ejd                  j*                  j,                        	 	 	 	 dd       Zd Z e7e3j2                  j                        	 	 	 	 	 dd       Zd Z e7e3j8                  j                        d         Z e7e3j<                         e+       	 	 	 	 	 dd              Z e7e3j@                         e+dǫ      d               Z! e7e3jD                  j                        d        Z# e7e3jH                  j                        d        Z% e7e3jL                  j                        d        Z' e7e3jP                         e+dǫ      d               Z)dedoebfdZ* e7e3jV                         e+dd      d	               Z, e7e3jZ                         e+dǫ      d
               Z. e7e3j^                         e+dd      d               Z0 e7e3jb                         e+dǫ      d               Z2 e7e3jf                  j"                        dd       Z4 e7e3jj                  j                  e3jj                  j                  g       e+       d               Z6 e7e3jn                  j                  e3jn                  j                  g       e+       dddekdekfd              Z7 e7e3jp                  j"                  e3jr                  j"                  g      d        Z: e7e3jv                  j                  g      d        Z< e7e3jz                  j                  e3jz                  j                  g       e+       d9d9dלd              Z> e7e3j~                  j                  e3j                  j                  g      dddd       ZA e7e3j                  j                  g      dddd       ZC e7e3j                  g       e+       d               ZE e7e3j                  g      d        ZG e7e3j                  g      d        ZI e7e3j                  g      d        ZK e7e3j                  g      d        ZM e7e3j                  g      d        ZO e7e3j                  j                        d        ZQ e7e3j                         e+       d                ZS e7e3j                  j                        	 	 	 	 	 	 dd!       ZU e7e3j                  j                        d"        ZWdd#ZX e7e3j                  j                  e3j                  j                  g       e+       ddd$d%              ZZ e7e3j                  j                  e3j                  j                  g      d&        Z] e7e3j                  j                  e3j                  j                  e3j                  j                  e3j                  j                  e3j                  j                  e3j                  j                  g       e+dd      dd'              Za e7e3j                  j                        d(        Zc e7e3j                  j                        d)        Ze e7e3j                  j                        d*        Zg e7e3j                  j                  e3j                  j                  e3j                  j"                  e3j                  j"                  e3j                  j                  e3j                  j                  e3j                  j                  g      d+        Zn e7e3j                  j                  e3j                  j                  e3j                  j"                  e3j                  j"                  g      dd,       Zq e7e3j                  j                  e3j                  j                  g      d-        Ztd. Zu e7e3j                  j"                  e3j                  j                  g      d/        Zw e7e3j                  j"                  e3j                  j                  g      d0        Zy e7e3j                  j                        d1        Z{ e7e3j                  j"                  e3j                  j                  g      d2        Z} e7e3j                  j"                  e3j                  j                  g      d3        Z e7e3j                   j                        d4        Z e7e3j                  j"                         e+       ddLefd5              Z e7e3j                  g       e+       	 dd6              Z e7e3j                  g      	 dd7       Z e7e3j                  g      	 dd8       Z e7e3j                  j                  e3j                  j                  g      dd9       Z e7e3j                  j                        d:        Z e7e3j                  j                        d;        Z e7e3j"                        d<        Z e7e3j&                         e+       d=               Z e7e3j*                        d>        Z e7e3j.                  j                        dd?       Z e7e3j2                  j                        d@        ZddAZ e7e3j8                  j                        dB        ZdC ZdD ZdE ZdF Z	 dd.edGekdHekdIekdJekdKekdLekdMekdNekdOekdPekdQekdRekdSekdTekdUekdVekdWekdXekdYekduebdZejf,d[Zd\ Zd.ededGekdHekdIekdJekdKekdLekdMekdNekdOekdPekdTekdUekdVekdWekdXekdYekduebf&d]Zd^ Z e7e3jL                  j                        d_        Z e7e3jP                  j                        	 	 	 	 dd`       Z e7e3jT                  j                        da        Z e7e3jX                         e+dd      	 	 	 	 ddb              Z e7e3j\                         e+dǫ      dc               Zd.eddefdeZ G df dge      Zd.eddedhekfdiZ e7e3jf                  j                        dj        Z e7e3jj                         e+       dk               Z e7e3jl                         e+dǐdl      dm               Z e7e3jn                  j                  g      dn        Z e7e3jp                  j                        	 	 	 	 	 d	do       Z e7e3jr                  j                        dp        Z e7e3jv                  j                        dq        Z e7e3jz                  j                        d
dr       ZddGekdsekdtejfduZdv Zdw Z e7e3j                  j                        ddx       ZÐddyZĐddzZŐd{ ZƐdd|Zǐdd}Z e7e3j                  j                        d~        Z e7e3j                        d        Z e7e3j                  j                  e3j                  j                  e3j                  j                  e3j                  j                  g       e+       dd              Z e7e3j                  j                  e3j                  j                  e3j                  j                  e3j                  j                  g      dd       Z e7e3j                  g      	 	 	 	 ddededededejdejdee   fd       Z e7e3j                  g      	 	 	 	 ddedededee   dejdedejdejdee   fd       Z e7e3j                  g      	 ddededededededededekdekdedejdededee   fd       Z e7e3j                  g      	 	 	 	 ddededededejdee   dee   fd       Z e7e3j                  g      	 	 ddedededededededejdee   dee   fd       Z e7e3j                  g      	 	 	 ddedededee   dejdejdee   fd       Z e7e3j                  g      	 	 ddededededee   dedededededeej   dejdee   fd       Z e7e3j                  g      	 ddedededededededededededekdekdedejdee   f d       Z e7e3j                  g      	 	 	 	 	 d	dedededee   dee   dekdekdedejdejdee   deek   deek   dee   dee   fd       Z e7e3j                  g      	 	 	 ddededededededededekdekdedejdededee   deek   deek   f"d       Z e7e3j                  g      	 	 	 	 	 ddededed1ee   dee   dee   deek   deek   dedekdejdee   dee   dee   deek   fd       Z e7e3j                  g      	 	 	 ddedededed1ee   dee   dee   dej                  dej                  dededededekdejdee   deek   dejf$d       Z e7e3j                  j                  g      	 	 	 	 ddFej"                  d7ej"                  dej"                  dej"                  d1eej"                     deej"                     d3eej                     dejfd       Z e7e3j                  j                  e3j                  j                  g       e+       dd              Z e7e3j                  j                        dd       Z e7e3j                  j                  e3j                  j                  g       e+       ddd#d              Zd Zd Z e7e3j                  j                  e3j                  j                  g      dd       Z e7e3j                  j                  e3j                  j                  g      dd       Z e7e3j                  j                  e3j                  j                  g      	 	 ddedee
ekej                  f      dee
ekej                  f      dee   dee   f
d       Z e7e3j                   j                  e3j                  j                  g      dd       Z  e7e3j                  j                  e3j                  j                  e3j                  j                  e3j                  j                  g      dd       ZdÄ Z e7e3j                  j                        	 	 ddĄ       Z e7e3j                  j                        dń        Z	 e7e3j                  j                        dƄ        Z
dǄ ZdȄ Z e7e3j                  j                  e3j                  j                  g      ddɄ       Z e7e3j                   j                        ddʄ       Z e7e3j"                  j                        dd˄       Z e7e3j&                         e+       	 dd̄              Z e7e3j*                  j                  e3j*                  j                  g       e+dd      dd̈́              Zej.                  Zd΄ Z e7e3j4                  j                        dτ        Z e7e3j6                  j                        dЄ        Z e7e3j8                  j                        dф        Z e7e3j<                  j                        d҄        Z e7e3j>                  j"                  e3j>                  j@                  g       e+       dddӜdԄ              Z! e7e3jD                  g       e+       ddՄ              Z# e7e3jH                  j                  e3jJ                  j                  g      	 	 ddք       Z& e7e3jN                  j                        dׄ        Z' e7e3jP                  j                  e3jP                  j                  g       e+       dd؄              Z( e7ejd                  jf                  jR                        dل        Z) e7ejd                  jf                  jT                        dڄ        Z* e7e3jV                         e+       dddddۜd܄              Z,d݄ Z- e7e3j\                        	 ddބ       Z/ e7e3j`                        	 dd߄       Z1 e7e3jd                        	 dd       Z3 e7e3jh                         e+       dddd              Z5 e7e3jl                         e+       dekdFedLefd              Z7 e7e3jp                        dFefd       Z9 e7e3jt                  j                        	 ddedSee   deek   defd       Z;d Z<d Z= e<e3j|                          e<e3j~                          e<e3j                          e<e3j                          e<e3j                          e<e3j                          e<e3j                          e<e3j                          e<e3j                          e=e3j                          e=e3j                          e=e3j                          e=e3j                          e=e3j                          e=e3j                          e=e3j                          e=e3j                          e=e3j                          e=e3j                          e=e3j                          e=e3j                         d ZS e7e3j                         e+       d               ZT e7e3j                         e+       d9dd              ZU e7e3j                         e+       d9dd              ZV eSe3j                        ZW eSe3j                        ZX eSe3j                        ZYd dl,Zd dlZZd dl[Zd Z\ e\        y(      N)Enum)wraps)ListOptionalSequenceTupleUnion)SymBoolSymFloatTensor)_add_op_to_registry_convert_out_paramsglobal_decomposition_table
meta_table)
OpOverload)_prim_elementwise_meta$ELEMENTWISE_PRIM_TYPE_PROMOTION_KIND)
BoolLikecorresponding_complex_dtypecorresponding_real_dtypeelementwise_dtypesELEMENTWISE_TYPE_PROMOTION_KIND	FloatLikeIntLikemake_contiguous_strides_forNumber
TensorLike)_maybe_convert_to_dtype_maybe_resize_out_resize_output_check_safe_copy_outout_wrapper)_broadcast_shapes_maybe_broadcast)_pytreeatenIMPLMetac                       fd}|S )Nc                 V     t                 fd}t        j                  |        S )Nc                 (    t        t        |        y N)r   r   )opfns    `/var/www/html/suriana-translation/venv/lib/python3.12/site-packages/torch/_meta_registrations.pyregisterz0register_meta.<locals>.wrapper.<locals>.register3   s    
B3    )r   pytree	tree_map_)r.   r0   r-   s   ` r/   wrapperzregister_meta.<locals>.wrapper0   s)     $	4 	2&	r1    )r-   r4   s   ` r/   register_metar6   /   s     Nr1   type_promotionc                     t        j                  |d| i\  }}|D cg c]  }t        ||       }}t        | }t	        |dt
        j                  iS c c}w )Ntype_promotion_kindr7   )utilsr   r   r$   r   r   DEFAULT)r7   args_result_dtypexs        r/   elementwise_metar@   <   ss    
 ..	*OA| ?CC#A|4CDC T"D "	BJJ  Ds   Ac                     t         j                  t         j                  t         j                  t         j                  t         j
                  t         j                  i}|j                  | |       S r,   )torch	complex32halfcfloatfloatcdoubledoubleget)dtypefrom_complexs     r/   toRealValueTyperL   P   sE    ekku||L
 E5))r1   c                 l     t        t         g|       t        j                   k(   fd       y )Nc                      d d  S )Nzoutput with shape z# doesn't match the broadcast shape r5   )broadcasted_shape
self_shapes   r/   <lambda>z)check_inplace_broadcast.<locals>.<lambda>]   s    $ZL0STeSfg r1   )tupler#   rB   _check)rP   
args_shaperO   s   ` @r/   check_inplace_broadcastrU   Y   s0    /
HZHI	LLZ'gr1   Fc	                 <   	 t         t        j                        r(t        j                   j	                         dk(  d        t        t        j                        r(t        j                  j	                         dk(  d        t        d  fD              rZt        j                  t        j                               		nFt        j                  t        j                        	fd       nxs t        j                         t        t        j                        sJ t        j                  t        t               fd       t        t              sJ t        j                  dk\  d        t        j                  f|d||	      S )
Nr   c                       yNz:linspace only supports 0-dimensional start and end tensorsr5   r5   r1   r/   rQ   z(meta_linspace_logspace.<locals>.<lambda>q       r1   c                       yrX   r5   r5   r1   r/   rQ   z(meta_linspace_logspace.<locals>.<lambda>v   rY   r1   c              3   <   K   | ]  }t        |t                y wr,   )
isinstancecomplex).0args     r/   	<genexpr>z)meta_linspace_logspace.<locals>.<genexpr>y   s     
C:c7#
Cs   c                      d  d S )Nzlinspace(): inferred dtype z& can't be safely cast to passed dtype r5   )default_complex_dtyperJ   s   r/   rQ   z(meta_linspace_logspace.<locals>.<lambda>   s    56K5LLrsxryz r1   c                      dt              j                   dt               j                   dt              j                   dS )Nz4received an invalid combination of arguments - got (, ))type__name__)endstartstepss   r/   rQ   z(meta_linspace_logspace.<locals>.<lambda>   sD     u+r$s),,-RU0D0D/EQH r1   c                       y)Nz$number of steps must be non-negativer5   r5   r1   r/   rQ   z(meta_linspace_logspace.<locals>.<lambda>   rY   r1   meta)rJ   layoutdevice
pin_memoryrequires_grad)r\   rB   r   rS   dimanyr:   r   get_default_dtypeis_complex_dtyperJ   _check_typer   empty)
ri   rh   rj   baserJ   rn   rm   ro   rp   rb   s
   ``` `    @r/   meta_linspace_logspacerx   a   sL    %&IIK1P	
 #u||$GGINP	

 
CsE/B
CC % A A##%!
 =)ELL&&u-z
 2002eU[[))) 
5'"	H
 eW%%%	LL!KL;;	# r1   c                    t        j                  j                  t         j                  k(  fd       t        j                  | j                         dk(  xr j                         dk7   d        | j                  j                        S )Nc                  "    d j                    S )Nz2take(): Expected a long tensor for index, but got rJ   indexs   r/   rQ   zmeta_take.<locals>.<lambda>   s    DU[[MR r1   r   c                       y)Nz*take(): tried to take from an empty tensorr5   r5   r1   r/   rQ   zmeta_take.<locals>.<lambda>   rY   r1   )rB   rS   rJ   long_check_indexnumel	new_emptyshape)selfr}   s    `r/   	meta_taker      sm     
LLuzz!R
 
ZZ\Q55;;=A#56< >>%++&&r1   rq   c                T     j                   }j                   }t        j                  ||k(  d        t        j                   j                        dk(  xr j                        dk(   fd       t	         j
                  j
                        } j                  |      S )Nc                       y)Nz=linalg.cross: inputs must have the same number of dimensions.r5   r5   r1   r/   rQ   zlinalg_cross.<locals>.<lambda>   rY   r1      c                  V    d  dj                          dj                          S )Nzlinalg.cross: inputs dimension z must have length 3. Got  and size)rq   otherr   s   r/   rQ   zlinalg_cross.<locals>.<lambda>   s6    -cU 399S>"%

3'8: r1   )ndimrB   rS   r   r#   r   r   )r   r   rq   x_dy_d	out_shapes   ```   r/   linalg_crossr      s     ))C
**C	LLs
O 
LL		#!4

31 4	
 "$**ekk:I>>)$$r1   c                 |    t        | d       t        | d       t        j                  | t        j                        S )Nzlinalg.matrix_expmemory_format)squareCheckInputscheckFloatingOrComplexrB   
empty_likecontiguous_formatr   s    r/   linalg_matrix_expr      s3     d/04!45D0G0GHHr1   valuesindicesc                 Z   t        j                  | j                  | j                  | j                        }t        j                  | j                  | j                  t         j
                        }| j                         dk7  r%| j                  dk7  rt        || j                         ||fS )Nrn   rJ   r   )	rB   rv   r   rn   rJ   int64r   r   maybe_wrap_dim)r   rq   r   r   s       r/   	cummaxminr      sp    
 [[DKKtzzJFkk$**T[[LGzz|qTYY!^sDII&7?r1   c                 t    t        || j                         t        j                  |       j	                         S r,   )r   r   rB   r   
contiguous)r   rq   s     r/   logcumsumexpr      s,     3		"D!,,..r1   c                    |j                   }t        |      }||z
  }t        t        |            }t        |      D 	cg c]  }	d }
}	|D ]  }d|
|<   	 g g }}|D ]*  }|
|   s|j	                  |       |j	                  |       , ||z   }t        |      }|j                         |d | }|j                  fdd       |||d  z   }|j                  |      }dgt        |j                  |d        z   }|j                  |      }|j                  d      }||d<   |}t        t        |            D ]  }|||      ||dz   <    | j                  |      } t        |      D 	cg c]  }	d }}	d}|dz
  }|dk\  r0|| j                  d      z  |||   <   ||||      z  }|dz  }|dk\  r0t        ||      D ]  }| j                  d||z
  z         |||   <   ! | j                  ||| j                               S c c}	w c c}	w )NFTc                     |    S r,   r5   r?   self_stridess    r/   rQ   z_exec_fft.<locals>.<lambda>   s    <? r1   keyreverser   r      )r   lenlistrangeappendstridesortpermuter   reshaper   
as_stridedstorage_offset)outr   	out_sizesrq   forwardr   signal_ndim
batch_dimsdim_permuter=   is_transformed_dimdleftright	batch_endtmpinputbatched_sizes
batch_sizebatched_out_sizesiout_stridesbatch_numelr   s                          @r/   	_exec_fftr      sK   99Dc(K#J uT{#K).t5A%55 % $1% b%D !!$KKNLLO	
 ,KD	I;;=L
jy
!CHH*DH9IJ//KLL%E D4JK 899MMM-(EAJ!M!%3s8_ 5#,SV#4!a% 5
++'
(C $Dk*1*K*KQA
q&&1CJJqM&AKN#yQ00	Q q& :t$ G&)jja*n1E&FKN#G>>)[#2D2D2FGGS 6@ +s   	G='	Hc                     | j                   j                  sJ | j                  }| j                  |      }|s|S |d d  }| j	                         |j                  fdd       t        || |||      }|S )Nc                     |    S r,   r5   r   s    r/   rQ   zmeta_fft_c2c.<locals>.<lambda>   s    <? r1   Tr   )rJ   
is_complexr   r   r   r   r   )r   rq   normalizationr   r   outputsorted_dimsr   s          @r/   meta_fft_c2cr     sv     ::    

I^^I&Fa&K;;=L2DAvtYWEFMr1   c                     | j                   j                  sJ t        | j                               }|r|d   }||   dz  dz   }|||<   | j	                  |t        j                  | j                               S )Nr      r   r{   )rJ   is_floating_pointr   r   r   r:   r   )r   rq   r   onesidedoutput_sizeslast_dimlast_dim_halfsizes          r/   meta_fft_r2cr   &  s|     ::''''		$Lr7)(3q8A=!2X>>E==djjI   r1   )	generatorc                B    t        |t        j                  | g            S r,   )r   rB   Size)nr   r   s      r/   meta_randpermr   6  s    S%**aS/22r1   rJ   rm   rn   ro   c                6    t        j                  | ||||      S Nr   rB   rv   )r   rJ   rm   rn   ro   s        r/   meta_randperm_defaultr   ;  s      ;;	vf r1   c                6    t        j                  |||||      S r   r   )highr   rJ   rm   rn   ro   s         r/   meta_randintr   I  s      ;;E&J r1   c                6    t        j                  |||||      S r   r   )lowr   r   rJ   rm   rn   ro   s          r/   meta_randint_lowr   Y  s      ;;E&J r1   c                6    t        j                  | ||||      S r   r   )r   rJ   rm   rn   ro   s        r/   meta_rand_defaultr   j  s      ;;E&J r1   c                     | j                   j                  sJ t        | j                               }|||d   <   | j	                  |t        | j                               S )Nr   r{   )rJ   r   r   r   r   rL   )r   rq   r   lastdimr   s        r/   meta_fft_c2rr   r  sQ     ::    		$L#LR>>,odjj.I>JJr1   c                 J   ddl m}  ||       s#t        j                  |       dk(  rt	        d      t        |t              ra|j                  | |      }| j                         |j                         k7  r.t        j                  j                  || j                                | S )Nr   )free_unbacked_symbolsr   zQmore than one element of the written-to tensor refers to a single memory location)%torch.fx.experimental.symbolic_shapesr   rB   _debug_has_internal_overlapRuntimeErrorr\   r   tor   r&   expand_copydefault)r   srcnon_blockingr   intermediates        r/   
meta_copy_r   {  s     L "$'E,M,Md,SWX,X_
 	
 #vvvdL199;,++--$$\499;?Kr1   c                     t        | j                               }t        | j                               }|| j                         k\  rdn
||   ||   z  }|j	                  |d       |j	                  ||       ||fS Nr   )r   r   r   rq   insert)tensorrq   result_sizesresult_strides
new_strides        r/   inferUnsqueezeGeometryr    sq    &L&--/*NVZZ\)|C/@>RUCV/VJQ#z*''r1   c                     t        || j                         dz         }t        | |      \  }}| j                  ||       | S r   )r   rq   r  as_strided_)r   rq   g_sizes	g_stridess       r/   meta_unsqueeze_r
    s>    
dhhj1n
-C/c:GYWi(Kr1   r   weight_metabias_activation_opt	out_dtypec                 8   t        | j                        }|*|j                  d      |j                  d      k(  sJ d       |j                  d      | j                  d      dz  k(  sJ |j                  d      |d<   t        | j                        dk(  sJ d       d| j                  d      f}|7| j                  t
        j                  k(  r|t
        j                  k(  sJ d       | j                  ||| j                  n|      j                  ||      }|S )	Nr   zoutput size mismatchr   r   r   z*we can only handle the squashed input case9out_dtype is only supported for i8i8->i32 linear operatorr{   )
r   r   r   r   rJ   rB   int8int32r   r   )	r   r  r  r  r  r  r   transposed_stridesr   s	            r/   meta_sparse_structured_linearr    s    $L{{1~1-E/EE-;;q>UZZ^a////{{1~L u{{q N"NN UZZ]+KK5::%)u{{*B	GF	GB__&.ekkI   j12 
 Mr1   mat1	mat1_metamat2c                    t        | j                        dk(  sJ t        |j                        dk(  sJ t        |j                        dk(  sJ | j                  d      |j                  d      dz  k(  sJ | j                  d      |j                  d      g}|7|j                  t        j
                  k(  r|t        j                  k(  sJ d       |j                  |||j                  n|      }|S )Nr   r   r   r  r{   r   r   r   rJ   rB   r  r  r   )r  r  r  r  r   r   s         r/   meta_sparse_structured_mmr    s     tzz?ay1$$$tzz?a99Q<499Q<!++++IIaL$))A,/LJJ%**$ekk)A	GF	GA^^%-djj9  F
 Mr1   r   )alphabetar  c                |   t        | j                        dk(  sJ d       t        |j                        dk(  sJ t        |j                        dk(  sJ t        |j                        dk(  sJ | j                  d      |j                  d      k(  sJ d       |j                  d      |j                  d      dz  k(  sJ |j                  d      |j                  d      g}|7|j                  t        j
                  k(  r|t        j                  k(  sJ d       |j                  |||j                  n|      }|S )Nr   zEonly input broadcasted to columns of mat1 * mat2 product is supportedr   r   r  r{   r  )	r   r  r  r  r  r  r  r   r   s	            r/   meta_sparse_structured_addmmr    s9    	EKKAONOtzz?ay1$$$tzz?a::a=DII	  ONO  99Q<499Q<!++++IIaL$))A,/LJJ%**$ekk)A	GF	GA^^%-djj9  F
 Mr1   compressed_Adense_Br  transpose_resultalg_idsplit_ksplit_k_one_kernelc	                 L   |j                   t        j                  t        j                  t        j                  t        j
                  t        j                  hv sJ d       | j                   |j                   k(  sJ d       t        |j                        dk(  sJ d       | j                   t        j
                  t        j                  fv }	|	rdnd}
|	r|j                         rJ d       |j                  d      }|j                  d	      }| j                         d
z  |
|z  z  }|||j                  d      k(  sJ |I|	r@|t        j                  t        j                  t        j                  t        j                  hv sJ d       |r||fn||f}|j                  ||      S )Nz;_cslt_sparse_mm only supports fp16, bf16, int8, and fp8e4m3zinputs must have the same dtyper   z'_cslt_sparse_mm only supports 2d inputs
   	   z.dense input must be transposed for 8bit dtypesr   r      z\out_dtype is not supported for {compressed_A.dtype} x {dense_B.dtype} -> {out_dtype} matmul!r{   )rJ   rB   float32float16bfloat16r  float8_e4m3fnr   r   is_contiguousr   r   r  r   )r   r!  r  r  r  r"  r#  r$  r%  is_8bit_input_typecompression_factorkr   moutput_shapes                  r/   meta__cslt_sparse_mmr4    s    ==

  E EE  .Q0QQ.w}}"M$MM"%++

E<O<O/PP1q%%'	<;	<' 	QAQA					"(:Q(>?ADIIaL   ##			j j		j .Aq6Aq6L\;;r1   T)include_selfr   rq   r}   sourcereducer5  returnc                L    t        j                  | t         j                        S Nr   )rB   r   r   r   rq   r}   r6  r7  r5  s         r/   meta_index_reducer<  6  s     D0G0GHHr1   c                    | S r,   r5   r;  s         r/   meta_index_reduce_r>  C  s	     Kr1   c                     t        | j                               }| j                         dkD  r|j                         ||<   | j	                  |      S )Nr   )r   r   rq   r   r   )r   rq   r}   result_sizes       r/   meta_index_selectrA  Q  s@     tyy{#KxxzA~ ;;=C>>+&&r1   )lengthsr   offsetsaxisunsafeinitialdatarB  rC  rD  rE  c                     |t        d       fd}| ||j                        S |+|j                  d d |j                  d   dz
  fz   }	 ||	      S t        d      )Nz?segment_reduce(): indices based reduction is not supported yet.c                     t        j                  | j                  dz   d  z   j                  dt         j                        S )Nr   rl   rJ   rn   r   )rB   rv   r   rJ   r   )lengths_shaperD  rG  s    r/   segment_reduce_lengths_tensorz:meta_segment_reduce.<locals>.segment_reduce_lengths_tensork  s>    {{DJJtaxz22**11	
 	
r1   r   r   z<segment_reduce(): Either lengths or offsets must be defined.)NotImplementedErrorr   r   )
rG  r7  rB  r   rC  rD  rE  rF  rL  rK  s
   `    `    r/   meta_segment_reducerN  Z  s|     !M
 	

 ,W]];; cr*gmmB.?!.C-EE,];;
U
VVr1   c                 $    | j                  d      S Nr5   r   r   s    r/   meta_maxrR  ~       >>"r1   c                     t        j                  | j                  |f      }t        | ||      }| j	                  |      | j	                  |t
        j                        fS Nr{   r:   reduction_dimsr   _compute_reduction_shaper   rB   r   r   rq   keepdimr3  s       r/   meta_max_dimr[    R    


tzzC6
2C+D#w?L|$|5::6 r1   c                 $    | j                  d      S rP  rQ  r   s    r/   meta_minr^    rS  r1   c                     t        j                  | j                  |f      }t        | ||      }| j	                  |      | j	                  |t
        j                        fS rU  rV  rY  s       r/   meta_min_dimr`    r\  r1   c                     | j                         rt        | j                        }nt        | t        j
                        \  }}t        j                  | |      S )Nr9   r{   )r   r   rJ   r   r   INT_TO_FLOATrB   r   )r   r>   r=   s      r/   
meta_anglerd    sI    /

;, ? L L
< D55r1   c                     t        j                  || j                         | j                         |j	                  t        j
                  |             S r,   )rB   _resize_output_r   rn   copy_angle)r   r   s     r/   meta_angle_outri    s6    	#tyy{DKK899U[[&''r1   c                      y r,   r5   )vals    r/   assert_asyncrl        
r1   c                      y r,   r5   )rk  
assert_msgs     r/   assert_async_metarp    rm  r1   c                      y r,   r5   )ss    r/   
print_metars    rm  r1   rJ   rm   rn   ro   r   c                 0    t        j                  dd      S )Nr   rl   rn   r   rt  s        r/   make_dep_tokenrw    s     ;;q((r1   c                 h    ddl m} t        | t        t        f      rt        d       || ||       y )Nr   )constrain_range'Constraining SymFloat or Symbool is nyiminmax)r   ry  r\   r   r
   
ValueError)r   r|  r}  ry  s       r/   sym_constrain_ranger    s/     F$7+,BCCDcs+r1   c                 6    t         j                  | ||       |S Nr{  )r&   r  r   r|  r}  	dep_tokens       r/   functional_sym_constrain_ranger    s    Ts4r1   c                 (   ddl m} ||t        j                  |        y t	        | t
        t        f      rt        d      t        |       t        u r5|t        j                  | |k\         |t        j                  | |k         y  || ||       y )Nr   )_constrain_range_for_sizerz  r{  )r   r  rB   _check_is_sizer\   r   r
   r~  rf   intrS   )r   r|  r}  r  s       r/   sym_constrain_range_for_sizer    s     P
{s{T"$7+,BCCDzS?LL%?LL%d5r1   c                 6    t         j                  | ||       |S r  )r&   r  r  s       r/   'functional_sym_constrain_range_for_sizer    s    %%d%=r1   c                     |S r,   r5   )rk  ro  r  s      r/   functional_assert_async_metar    s    r1   f_namec                     | j                         dk\  s
J | d       | j                  d      | j                  d      k(  s.J | d| j                  d       d| j                  d       d       y )Nr   z3: The input tensor must have at least 2 dimensions.r   z5: A must be batches of square matrices, but they are  by 	 matrices)rq   r   )r   r  s     r/   r   r     s    
aF
DEF 			"2&s
FtyyQS}oUYZ^ZcZcdfZgYhhqrs&r1   Anamec                     t        j                   j                  j                  k(   fd       t        j                   j                  j                  k(   fd       t        j                  j	                  d      j	                  d      k(  fd       t        j                  j	                  d       j	                  d      k(   fd       y )Nc                  >    dj                    d j                    dS )Nz:Expected b and A to be on the same device, but found b on z
 and A on 	 instead.rv  r  r   s   r/   rQ   z(linearSolveCheckInputs.<locals>.<lambda>
  s%    H{{m:ahhZy: r1   c                  >    dj                    d j                    dS )Nz=Expected b and A to have the same dtype, but found b of type z and A of type r  r{   r  s   r/   rQ   z(linearSolveCheckInputs.<locals>.<lambda>  s%    Kzzl/!'')= r1   r   r  c                  R    d j                  d       d j                  d       dS )Nz3A must be batches of square matrices, but they are r  r  r   r  r   r  s   r/   rQ   z(linearSolveCheckInputs.<locals>.<lambda>  s0    FF2J<tAFF2J<yB r1   c                      d d j                  d       d j                  d       dj                  d       dj                  d       
S )NzIncompatible matrix sizes for z: each A matrix is r   r  z but each b matrix is r  r   )r  r  r   s   r/   rQ   z(linearSolveCheckInputs.<locals>.<lambda>"  sR    ,TF 3D$TYYr]O4		"H r1   )rB   rS   rn   rJ   r   )r   r  r  s   ```r/   linearSolveCheckInputsr    s    	LLqxx	
 
LL

agg	
 
LL	r
affRj 	
 
LL	r
diim#	
r1   tallow_low_precision_dtypesc                 J   | j                   t        j                  | j                         xs | j	                         fd       |sYt        j                  t        j
                  t        j                  t        j                  t        j                  fv fd       y y )Nc                       d  S )Nz<: Expected a floating point or complex tensor as input. Got r5   rJ   r  s   r/   rQ   z(checkFloatingOrComplex.<locals>.<lambda>3  s    6(VW\V]^ r1   c                       d  S )Nz*: Low precision dtypes not supported. Got r5   r  s   r/   rQ   z(checkFloatingOrComplex.<locals>.<lambda>8  s    vhHP r1   )	rJ   rB   rS   r   r   rF   rH   rE   rG   )r  r  r  rJ   s    ` @r/   r   r   +  sn    
 GGE	LL	/^ &ekk5<<u}}MMP	
 &r1   arg_namec                 ^    t        j                  | j                         dk\  fd       y )Nr   c                       d  dS )Nz: The input tensor z! must have at least 2 dimensions.r5   )r  r  s   r/   rQ   zcheckIsMatrix.<locals>.<lambda>@  s    6(-hZ7XY r1   )rB   rS   rq   )r  r  r  s    ``r/   checkIsMatrixr  =  s    	LL	1Yr1   Br   c                      t                t               t        j                  r# j	                  d      j	                  d      k(  n" j	                  d      j	                  d      k(   fd       y )Nr  r   c                       drdnd d j                  d       d j                  d       dj                  d       dj                  d       d	S )
Nz2: Incompatible shapes of A and B for the equation zAX = BzXA = Bz (r  r?   r   r   re   r   )r  r  r  r   s   r/   rQ   z#checkInputsSolver.<locals>.<lambda>I  s[    hHxX.AaffRj\qvvbzl!AFF2J<qJ r1   )r   r  rB   rS   r   )r  r  r   r  s   ````r/   checkInputsSolverr  D  sY    a !V	LL$(r
affRj affRjAFF2J.F	
r1   resultfn_nameresult_namec                 r     t        j                  j                  j                  k(   fd       y )Nc            	      L      d d dj                    dj                    	S )Nz: Expected z5 and input tensors to be on the same device, but got z on z and input on rv  )r  r   r  r  s   r/   rQ   z!checkSameDevice.<locals>.<lambda>Y  s5    i{;-/dm4nU\\NL r1   )rB   rS   rn   )r  r  r   r  s   ````r/   checkSameDevicer  Q  s&     
LL%	
r1   UPLOc                       j                         }t        j                  t               dk(  xr |dk(  xs |dk(   fd       y )Nr   ULc                      d  S )Nz1Expected UPLO argument to be 'L' or 'U', but got r5   )r  s   r/   rQ   zcheckUplo.<locals>.<lambda>d  s    CD6J r1   )upperrB   rS   r   )r  UPLO_uppercases   ` r/   	checkUplor  `  s<    ZZ\N	LLD	QKNc1J^s5JJr1   eigenvalueseigenvectorsr  	compute_vc                 T   t        | d       t        |       t        | j                        }|r/| j	                  |      }|j                  |t        |d             n| j	                  dg      }|j                          | j	                  |t        | j                              }||fS )Nzlinalg.eighF	row_majorr   r{   )
r   r  r   r   r   r  r   poprL   rJ   )r  r  r  r   vecsvalss         r/   meta__linalg_eighr  h  s     a'dOME{{5! ;EU ST{{A3	IIK;;uOAGG$<;=D:r1   c                     t        | d       t        j                  | j                        r| j                  nt        j                  | j                        }| j                  | j                  d d |      S )Nzlinalg.eigvalsr   r{   r   r:   rt   rJ   r   r   r   )r   complex_dtypes     r/   meta__linalg_eigvalsr  {  sc     e-. !!%++. 	..u{{; 
 ??5;;s+=?AAr1   c                 0   t        | d       t        j                  | j                        r| j                  nt        j                  | j                        }| j                  | j                  d d |      }| j                  | j                  |      }||fS )Nz
linalg.eigr   r{   r  )r   r  r   vectorss       r/   meta_linalg_eigr    s     e\* !!%++. 	..u{{; 
 __U[["-]_CFooekko?G7?r1   r   c                 v    | j                   j                  t        j                        j	                  dd      S )Nr   r  r   )mTclonerB   r   	transpose)r   s    r/   cloneBatchedColumnMajorr    s*    66<<e&=&=<>HHRPPr1   r  c                     t        |       S r,   )r  )r   r  r  s      r/   _cholesky_solve_helperr    s     #4((r1   c                      t        j                   j                  dk\   fd       t        j                  j                  dk\  fd       t         d      \  }}t	        |||      S )Nr   c                  $    d j                    dS )Nz-b should have at least 2 dimensions, but has  dimensions insteadr   r   s   r/   rQ   z cholesky_solve.<locals>.<lambda>  s    ?		{J]^ r1   c                  $    d j                    dS )Nz-u should have at least 2 dimensions, but has r  r  r  s   r/   rQ   z cholesky_solve.<locals>.<lambda>  s    ?xGZ[ r1   cholesky_solve)rB   rS   r   !_linalg_broadcast_batch_dims_namer  )r   r  r  self_broadcastedA_broadcasteds   ``   r/   r  r    sh     
LL		Q^ 
LL	![ 'Ha!'#m ""2M5IIr1   c                     | j                         dk(  r%t        j                  | t        j                        S t	        | d       t        |       S )Nr   r   cholesky)r   rB   r   legacy_contiguous_formatr   r  r   r  s     r/   r  r    s@     zz|qE4R4RSSdJ'"4((r1   c                 0    t        | d       t        |       S )Ncholesky_inverse)r   r  r  s     r/   r  r    s     d./"4((r1   check_errorsc                    t        | d       t        | d       | j                  }t        |      }t	        |d      }| j                  |      }|j                  ||       | j                  |d|dz
   t        j                        }||fS )Nzlinalg.choleskyFr   r   r{   )	r   r   r   r   r   r   r  rB   r  )r  r  r  A_shaper   	L_stridesr  infoss           r/   linalg_cholesky_exr    s    a*+1/0ggGw<D ,GU;I	GAMM'9% KKD1H-U[[KAEe8Or1   tauc                 @    t        j                   j                  dk\  d        t        j                   j                  d       j                  d      k\  d        t        j                   j                  d      j                  d      k\  d        t        j                   j                  j                  z
  dk(   fd        j                  dkD  r: j                  d d }j                  d d t        j                  |k(  fd	       t        j                  j
                   j
                  k(   fd
       t        d d       t        j                   j                  t         j                  d       j
                   j                        S )Nr   c                       y)NzHtorch.linalg.householder_product: input must have at least 2 dimensions.r5   r5   r1   r/   rQ   z,linalg_householder_product.<locals>.<lambda>  rY   r1   r  r   c                       y)Nzbtorch.linalg.householder_product: input.shape[-2] must be greater than or equal to input.shape[-1]r5   r5   r1   r/   rQ   z,linalg_householder_product.<locals>.<lambda>  rY   r1   c                       y)Nz`torch.linalg.householder_product: input.shape[-1] must be greater than or equal to tau.shape[-1]r5   r5   r1   r/   rQ   z,linalg_householder_product.<locals>.<lambda>  rY   r1   r   c                  <    dj                    d j                    S )Nzptorch.linalg.householder_product: Expected tau to have one dimension less than input, but got tau.ndim equal to  and input.ndim is equal to r  r   r  s   r/   rQ   z,linalg_householder_product.<locals>.<lambda>  '    )),
2Nuzzl\ r1   c                      d  S )Nzltorch.linalg.householder_product: Expected batch dimensions of tau to be equal to input.shape[:-2], but got r5   actual_batch_tau_shapes   r/   rQ   z,linalg_householder_product.<locals>.<lambda>      66L5MO r1   c                  <    dj                    d j                    S )Nz,torch.linalg.householder_product: tau dtype z does not match input dtype r{   r  s   r/   rQ   z,linalg_householder_product.<locals>.<lambda>  s#    :399+*5;;-9 r1   z torch.linalg.householder_productr  Fr  r   r   rJ   rn   )
rB   rS   r   r   r   rJ   r  empty_stridedr   rn   )r   r  expected_batch_tau_shaper  s   `` @r/   linalg_householder_productr    sK   
 
LL

aZ 
LL

2%**R.(t 
LL

2#((2,&r
 
LL

SXX"	
 zzA~#(;;s#3 !$3B"&>>	
 
LL		U[[ 	
 6UEJ[[*5;;%Hkk||	 r1   c                 2   t        | d       t        | dd       | j                  | j                        }|j	                  | j                  t        | j                  d             | j                  | j                  d d t        j                        }||fS )Nzlinalg.inv_exF)r  r  r  r{   r   r   r   r   r  r   rB   r  )r  r  r  r  s       r/   linalg_inv_ex_metar  	  sq    a)1o%P	AGGAMM!''6qww%PQKKEKKK8Ee8Or1   LDpivotsinfo)	hermitianr  r  c                   t        | d       t        | d       t        j                  | j                  t        | j                  d      | j                  | j                        }| j                  | j                  d d t        j                        }| j                  | j                  d d t        j                        }|||fS )Nztorch.linalg.ldl_factor_exFr  r  r   r{   r  )
r   r   rB   r  r   r   rJ   rn   r   r  )r   r  r  r  r  r  s         r/   linalg_ldl_factor_ex_metar    s     d894!=>			ZZ*4::Gjj{{	
B ^^DJJsO599^=F>>$**Sb/>;Dvtr1   )r  c                d    t         d       t         d       t         d       t        j                  j
                  dk\  fd        j                  d d }t        j                  |j                  k(  fd       t        j                  t        j                  j                        fd       t        j                   j                  j                  k(   fd       t               \  }}t        j                  |t        |d	      j                  j                  
      S )Nztorch.linalg.ldl_solver   c                  $    d j                    dS )NzMtorch.linalg.ldl_solve: Expected B to have at least 2 dimensions, but it has r  r  )r  s   r/   rQ   z'linalg_ldl_solve_meta.<locals>.<lambda>8      &&!46 r1   r   c                  $    d j                    dS )Nzjtorch.linalg.ldl_solve: Expected LD.shape[:-1] and pivots.shape to be the same, but got pivots with shape  insteadr   r  s   r/   rQ   z'linalg_ldl_solve_meta.<locals>.<lambda>@      ))/h@ r1   c                  "    d j                    S )Nz<torch.linalg.ldl_solve: Expected pivots to be integers. Got r{   r  s   r/   rQ   z'linalg_ldl_solve_meta.<locals>.<lambda>G  s    Nv||n] r1   c                  <    dj                    d j                    S )Nz!torch.linalg.ldl_solve: LD dtype z does not match b dtype r{   )r  r  s   r/   rQ   z'linalg_ldl_solve_meta.<locals>.<lambda>K  s"    3BHH:=UVWV]V]U^_ r1   Fr  r  )r   r   r  rB   rS   r   r   r:   is_integer_dtyperJ   _linalg_broadcast_batch_dimsr  r   rn   )r  r  r  r  expected_pivots_shapeB_broadcast_sizer=   s   ```    r/   linalg_ldl_solve_metar  *  s     b232781b":;	LL	!	
 HHSbM	LL-	
 
LLv||,] 
LL
AGG_ 7q"=a*+;uMggxx	 r1   Pr  )pivotr  c                h    t        j                   j                  dk\   fd       t         j                        }|d   }|d   }t        ||      }||d<   |r j                  |      }n j                  dg      }||d<    j                  |      }||d<   ||d<    j                  |      }|||fS )Nr   c                  $    d j                    dS )Nz@linalg.lu: Expected tensor with 2 or more dimensions. Got size: r  r  r  s   r/   rQ   z linalg_lu_meta.<locals>.<lambda>[  s    RSTSZSZR[[cd r1   r  r   r   )rB   rS   r   r   r   r|  r   )	r  r  sizesr2  r   r1  r  r  r  s	   `        r/   linalg_lu_metar  V  s     
LL	!d
 MEb	Ab	AAq	AE"IKKKKE"I	EAE"IE"I	EAa7Nr1   LU)r  r  c                    t        j                   j                  dk\   fd       t         j                        }|d   }|d   }t        j
                  |t        |d       j                   j                        }|j                          t        ||      |d<    j                  |t         j                        }|j                           j                  |t         j                        }|||fS )	Nr   c                  $    d j                    dS )NzFtorch.lu_factor: Expected tensor with 2 or more dimensions. Got size: r  r  r  s   r/   rQ   z*linalg_lu_factor_ex_meta.<locals>.<lambda>|  s    XYZY`Y`Xaaij r1   r  r   Fr  r  r{   )rB   rS   r   r   r   r  r   rJ   rn   r  r|  r   r  )	r  r  r  r  r2  r   r  r  r  s	   `        r/   linalg_lu_factor_ex_metar  r  s     
LL	!j
 MEb	Ab	A			*5EBggxx	
B 
IIKAq	E"I[[eii[0F 
IIK;;uEII;.Dvtr1   )r   adjointr  c                    t         d       t        j                   j                  j                  k(   fd       t        j                  j                  t        j                  k(  d        t         d       t         |d       t        j                   j                  d      j                  d      k(  d        t        j                   j                  d d j                  k(  fd       t               \  }}t        j                  |t        ||       j                  j                  	      }|j                         d
k7  r"|s |j                         r|j                         }|S )Nztorch.linalg.lu_solvec                  >    dj                    d j                    dS )NzPlinalg.lu_solve: Expected LU and B to have the same dtype, but found LU of type  and B of type r  r{   )r  r  s   r/   rQ   z&linalg_lu_solve_meta.<locals>.<lambda>  s(    $$&HH:_QWWIXO r1   c                       y)NzElinalg.lu_solve: pivots should be a Tensor of scalar type torch.int32r5   r5   r1   r/   rQ   z&linalg_lu_solve_meta.<locals>.<lambda>  rY   r1   zlinalg.lu_solver   c                       y)NzYlinalg.lu_solve: Number of pivots per batch should be same as the dimension of the matrixr5   r5   r1   r/   rQ   z&linalg_lu_solve_meta.<locals>.<lambda>  rY   r1   c                  $    d j                    dS )Nzclinalg.lu_solve: Expected LU.shape[:-1] and pivots.shape to be the same, but got pivots with shape r  r  r  s   r/   rQ   z&linalg_lu_solve_meta.<locals>.<lambda>  r	  r1   r  r  r   )r   rB   rS   rJ   r  r   r  r   r   r  r  r   rn   r   r   conj)r  r  r  r   r  r  r=   r  s   ```     r/   linalg_lu_solve_metar#    s.    267	LL
AGG	
 
LL		!W b12b!T#45	LL
v{{2&k 
LL
"%	
 7q"=a  *+;4xPggxx	F ||~4[[]FMr1   unpack_dataunpack_pivotsc                     t        j                   j                  dk\   fd       |r2t        j                  |j                  t         j                  k(  d        t         j                        }|d   }|d   }t        ||      }||d<   |r j                  |      }n j                  dg      }|r2||d<    j                  |      }	||d<   ||d<    j                  |      }
n$ j                  dg      }	 j                  dg      }
||	|
fS )Nr   c                  $    d j                    dS )NzFtorch.lu_unpack: Expected tensor with 2 or more dimensions. Got size: r  r  )r  s   r/   rQ   z lu_unpack_meta.<locals>.<lambda>  s    XY[YaYaXbbjk r1   c                       	 y)Nztorch.lu_unpack: LU_pivots is expected to be a contiguous tensor of torch.int32 dtype.
Note: this function is intended to be used with the output produced by torch.linalg.lu_factorr5   r5   r1   r/   rQ   z lu_unpack_meta.<locals>.<lambda>  s    p r1   r  r   r   )	rB   rS   r   rJ   r  r   r   r|  r   )r  r  r$  r%  r  r2  r   r1  r  r  r  s   `          r/   lu_unpack_metar)    s     
LL
1k LLEKK'	
 NEb	Ab	AAq	AE"ILLLL!b	LLb	b	LLLL!LL!a7Nr1   modec                       dk(  rd}d}||fS  dk(  rd}d}||fS  dk(  rd}d}||fS t        j                  d fd       fS )NreducedTcompleteFrc                      d  dS )Nzqr received unrecognized mode 'z=' but expected one of 'reduced' (default), 'r', or 'complete'r5   )r*  s   r/   rQ   z _parse_qr_mode.<locals>.<lambda>  s    1$ 8N O r1   rB   rS   )r*  	compute_qr,  s   `  r/   _parse_qr_moder2    s    y	 g 
		 g 
	 g 		
 gr1   QRc                    t        | d       t        | d       t        |      \  }}| j                  d   }| j                  d   }t	        ||      }|rMt        | j                        }|r|n||d<   | j                  |      }|j                  |t        |d             n| j                  dg      }t        | j                        }	|s|s|n||	d<   | j                  |	      }
|
j                  |	t        |	d             ||
fS )Nz	linalg.qrr  r   Fr  r   )	r  r   r2  r   r|  r   r   r  r   )r  r*  r1  reduced_moder2  r   r1  Q_shaper3  R_shaper4  s              r/   linalg_qr_metar9    s     ![!1k*,T2I|	A	AAq	Aqww-'aQKK 	g:7eTUKK 177mG#9!!GBK	GAMM'6w%PQa4Kr1   sign	logabsdetc                    t        | d       t        | dd       | j                  }| j                  |d d       }| j                  |d d t	        | j
                              }t        j                  |t        |d      | j
                  | j                        }| j                  |d d t        j                        }||||fS )Nzlinalg.slogdetFr  r{   r  r   )r   r   r   r   rL   rJ   rB   r  r   rn   r  )r  r   r:  r;  r  r  s         r/   _linalg_slogdetr=  *  s     a)*1.6GGE;;uSbz"DE#2Joagg.FGI			*5%8ggxx	
B [[s5;;[7FB&&r1   full_matrices
compute_uvdriverc                 b   t        | d       t        | d       t        | j                  d d       }| j                  d   }| j                  d   }t	        ||      }|r|||r|n|gz   }| j                  |      }	|	j                  |t        |d             ||r|n||gz   }
| j                  |
      }t        |       dk(  }|j                  |
t        |
|             n$| j                  dg      }	| j                  dg      }| j                  ||gz   t        | j                              }|	||fS )	Nz
linalg.svdr  r   Fr  cudar   r{   )r  r   r   r   r|  r   r  r   device_hintrL   rJ   )r  r>  r?  r@  r   r2  r   r1  U_shaper  V_shapeVis_cudaSs                 r/   _linalg_svd_metarI  >  s#    !\"1l+aggcrl#J	A	AAq	A11==KK 	g:7eTU]1==KK 
 a.F*	g:7gVW KKKK 	
J!$OAGG,DEAa7Nr1   arg1arg2c                    | j                   d d }|j                   d d }t        ||      }t        |      }|| j                  d      | j                  d      gz  }t        |      }||j                  d      |j                  d      gz  }||fS )Nr  r   )r   r#   r   r   )rJ  rK  arg1_batch_sizesarg2_batch_sizesexpand_batch_portionarg1_expand_sizearg2_expand_sizes          r/   r  r  d  s    
 zz#2zz#2,-=?OP012		"66012		"66---r1   c                     |rt        | ||       t        | |      \  }}|| j                  k(  r| n| j                  |      }||j                  k(  r|n|j                  |      }||fS r,   )r  r  r   expand)rJ  rK  r  rP  rQ  arg1_broadcastedarg2_broadcasteds          r/   r  r  u  sv     tT40)EdD)Q&& !DJJ.DKK@P4Q  !DJJ.DKK@P4Q  ---r1   r   c                     | j                   d d }|j                  dk(  xs- | j                  dz
  |j                  k(  xr |j                   |k(  }|S )Nr   r   )r   r   )r   r   expected_batched_rhs_shapevector_cases       r/   linalg_solve_is_vector_rhsrY    sS    !&Sb!1**/ 

Q%**$R8R)R  r1   )r   r  r  r  r  r  c                    t         d       t        j                   j                  j                  k(   fd       t	               }|rj                  d      n}	t         |	|d       t        |	       \  }
}t        j                  |xs | d        |r|
d d n|
}t        j                  |t        ||       j                  j                        } j                  } j                  }t        j                  |t        |d       j                   j                        } j                  |d d t        j                        } j                  |d d t        j                        }||||f}||||f}t        d	 |D              rbt!        ||      D ]S  \  }}t#        ||j                         |j%                  |j                  |j'                                t)        ||d
       U |S )Nzlinalg.solvec                  >    d j                    dj                    dS )NzKlinalg.solve: Expected A and B to have the same dtype, but found A of type r  r  r{   )r  r  s   r/   rQ   z"_linalg_solve_ex.<locals>.<lambda>  s%    Ywwiqwwix9 r1   r   c                       	 y)Nzlinalg.solve: Vector broadcasting of the left hand side is not supported for left=False. In this case linalg.solve is equivalent to B / A.squeeze(-1)r5   r5   r1   r/   rQ   z"_linalg_solve_ex.<locals>.<lambda>  s    K r1   r  Fr{   r  c              3   $   K   | ]  }|d u 
 y wr,   r5   )r^   r?   s     r/   r`   z#_linalg_solve_ex.<locals>.<genexpr>  s     
&Q1D=
&s   )	copy_fromcopy_toexact_dtype)r   rB   rS   rJ   rY  	unsqueezer  r  r  r   rn   r   r   r   r  allzipr   r  r   r!   )r  r  r   r  r  r  r  r  rX  B_B_broad_shaper=   result_shaperesult_r   r   LU_pivots_info_r   resr.  os   ``                     r/   _linalg_solve_exrm    s    1n-	LL	177	
 -Q2K'RQBaT>23B:M1	LLK	
 *5="%-L!!*<TBggxx	G GGE66D


*5%8ggxx	C kk%*EKKk8GKKcr
%++K6E2vt
$CC%
(C

&#
&&SM 	FDAqa)MM!''188:.QuE	F Jr1   )r   unitriangularr   rn  r   c                   || j                  dg      }t        |t              sJ t        | ||d       t	        || d       \  }}|j                  dd      j                         xr |j                         }|rt        ||j                        }|S t        ||j                        r=|j                  |j                  dd      j                         |j                  dd       |S )Nr   zlinalg.solve_triangularr  r   )r   r\   r   r  r  r  r.  is_conjr   r   r    resize_
transpose_)	r  r  r  r   rn  r   rd  A_avoid_copy_As	            r/   linalg_solve_triangular_metaru    s     {kk1#c:&&&aD";<.q!T:FB<<B'557HBJJLLRXX. J  RXX.KKR,223NN2r"Jr1   XM)r`  r  c                     t        j                   j                  dk\   fd       t        j                  j                  dk\  fd       t         d       j                  t         j
                  k(  rt               \  }}t        j                  |t        |d       j                   j                        }t        j                  |t        |d      j                  j                        }||fS j                  t         j                  k(  sj                  t         j                  k(  r+t        j                         } j                  dg      }||fS t        j                  dd	        fS )
Nr   c                  $    d j                    dS )NzMtorch.triangular_solve: Expected b to have at least 2 dimensions, but it has r  r  r   s   r/   rQ   z'triangular_solve_meta.<locals>.<lambda>  s    ))$79 r1   c                  $    d j                    dS )NzMtorch.triangular_solve: Expected A to have at least 2 dimensions, but it has r  r  r  s   r/   rQ   z'triangular_solve_meta.<locals>.<lambda>  r  r1   triangular_solveFr  r  r   c                       y)Nz+triangular_solve: Got an unexpected layout.r5   r5   r1   r/   rQ   z'triangular_solve_meta.<locals>.<lambda>  rY   r1   )rB   rS   r   r  rm   stridedr  r  r   rJ   rn   
sparse_csr
sparse_bsrr   r   )	r   r  r  r  rn  self_broadcast_sizeA_broadcast_sizesolutioncloned_coefficients	   ``       r/   triangular_solve_metar    sL    
LL		Q	
 
LL	!	
 4$67xx5== 0LTST0U--&&$./BeT**;;	
 #00!./?5Q''88	
 ''' 
U%%	%U5E5E)E##D)!^^QC0 ''' 	UQR'''r1   c                 l   t        | d       t        | d       | j                  | j                  d d       }| j                  | j                        }|j	                  | j                  t        | j                  d             | j                  | j                  d d t        j                        }|||fS )Nz
linalg.detr  Fr  r   r{   r  )r  detr  r  s       r/   _linalg_det_metar    s    a&1l+
++aggcrl
#C	
QWW	BNN17775QR[["U[[[9FF?r1   c                 0    t        j                   j                  dk\  d        t        j                  j                  dk\  d        |rdndt        j                  j                     j                  d   k\  fd       t        j                  j                      j                  d   k(  fd       t        j                  j                  d    j                  d   k  d        t        j                   j                  j                  z
  d	k(   fd
       t        j                   j                  j                  k(   fd        j                  dkD  re j                  d d }j                  d d t        j                  |k(  fd       j                  d d t        j                  |k(  fd       t        j                  j                   j                  k(   fd       t        j                  j                   j                  k(   fd       t        d d       t        d d       t        j                  j                  t        j                  d      j                  j                        S )Nr   c                       y)Nz3torch.ormqr: input must have at least 2 dimensions.r5   r5   r1   r/   rQ   zormqr.<locals>.<lambda>0  rY   r1   c                       y)Nz3torch.ormqr: other must have at least 2 dimensions.r5   r5   r1   r/   rQ   zormqr.<locals>.<lambda>3  rY   r1   r  r   c                      d  dS )Ntorch.ormqr: other.shape[z0] must be greater than or equal to tau.shape[-1]r5   left_size_conditions   r/   rQ   zormqr.<locals>.<lambda>9  s    +,?+@@pq r1   c                      d  dS )Nr  z"] must be equal to input.shape[-2]r5   r  s   r/   rQ   zormqr.<locals>.<lambda>=  s    +,?+@@bc r1   c                       y)NzHtorch.ormqr: tau.shape[-1] must be less than or equal to input.shape[-1]r5   r5   r1   r/   rQ   zormqr.<locals>.<lambda>B  rY   r1   r   c                  <    dj                    d j                    S )Nz[torch.ormqr: Expected tau to have one dimension less than input, but got tau.ndim equal to r  r  r  s   r/   rQ   zormqr.<locals>.<lambda>G  r  r1   c                  <    dj                    d j                    S )Nzhtorch.ormqr: Expected other to have the same number of dimensions as input, but got other.ndim equal to r  r  r   r   s   r/   rQ   zormqr.<locals>.<lambda>N  s+    ++0::,6RSXS]S]R^` r1   c                      d  S )NzWtorch.ormqr: Expected batch dimensions of tau to be equal to input.shape[:-2], but got r5   r  s   r/   rQ   zormqr.<locals>.<lambda>Y  r  r1   c                      d  S )NzYtorch.ormqr: Expected batch dimensions of other to be equal to input.shape[:-2], but got r5   )actual_batch_other_shapes   r/   rQ   zormqr.<locals>.<lambda>b  s    66N5OQ r1   c                  <    d j                    dj                    S )NzPtorch.ormqr: Expected input and tau to have the same dtype, but input has dtype z and tau has dtype r{   r  s   r/   rQ   zormqr.<locals>.<lambda>j  s'    ##(;;-/B399+O r1   c                  <    d j                    dj                    S )NzRtorch.ormqr: Expected input and other to have the same dtype, but input has dtype z and other has dtype r{   r  s   r/   rQ   zormqr.<locals>.<lambda>q  s'    ##(;;-/DU[[MS r1   ztorch.ormqrr  r   Fr  r  )	rB   rS   r   r   rJ   r  r  r   rn   )	r   r  r   r   r  expected_batch_shaper  r  r  s	   ```   @@@r/   ormqrr  &  s    
LL

aV 
LL

aV !%""	LL'(CIIbM9q 
LL'(EKKO;c
 
LL		"R(Z
 
LL

SXX"	
 
LL

ejj 	
 zzA~${{3B/!$3B"&::	
 $);;s#3 $(<<	
 
LL		U[[ 	
 
LLu{{"	
 M3u5M5%9[[*5;;%Hkk||	 r1   c                t    t        j                  t              dz  k(  fd        j                  }|dz   k(  }|}| }|r*t	        d|      D ]  }|xr  j                  |      dk7  } n)t	        d|      D ]  }|xr  j                  |      dk7  } t        j                  |xs | fd       y )Nr   c                  ,    dd z   dt               S )Nzpadding size is expected to be r   z, but got: r   )rq   paddings   r/   rQ   z,_padding_check_valid_input.<locals>.<lambda>  s    1!c'+c'l^T r1   r   r   c                  :    d dz    d dz    dj                    S )Nz	Expected r   zD or r   zcD (batch mode) tensor with possibly 0 batch size and other non-zero dimensions for input, but got: r  )rq   r   s   r/   rQ   z,_padding_check_valid_input.<locals>.<lambda>  s2    aycAgY /AAFO r1   )rB   rS   r   r   r   r   )r   r  rq   	input_dimis_batch_modevalid_batch_modevalid_non_batch_moder   s   ```     r/   _padding_check_valid_inputr    s    	LLGCT
 

I#'*M$,,q)$ 	GA/FEJJqMQ4F	G q)$ 	OA#7#NEJJqMQ<N 	O 
LL00	
r1   c                   	
 d}dd} j                   dk(  r j                  d      }dz  |dz  }t         |d       |\  	
 j                  |      } j                        	z   
z   |r&t        j                  	k  xr 
k   	
fd       t        j                  dk\  fd        j                   dk(  r j                  |f      S  j                  ||f      S )Nr   r   r   r   c                  4    d d d  dj                    S NzcArgument #4: Padding size should be less than the corresponding input dimension, but got: padding (rd   ) at dimension 
 of input r  dim_wr   pad_lpad_rs   r/   rQ   z_pad1d_common.<locals>.<lambda>  2    %%*G2eWOE7*UZU`U`Tac r1   c                      d  d S )Nz
input (W: z%) is too small. Calculated output W: r5   )input_woutput_ws   r/   rQ   z_pad1d_common.<locals>.<lambda>  s    *WI%J8*U r1   r   )r   r   r  rB   rS   r   )r   r  is_reflection	dim_planenbatchnplaner  r  r  r  r  s   `     @@@@@r/   _pad1d_commonr    s    IEFzzQA
Q	ug15LE5ZZ	"FjjG&HGO/	
 
LLAU
 zzQ1229::r1   c                     t        | |d      S NTr  r  r   r  s     r/   meta_reflection_pad1dr         t<<r1   c                     t        | |d      S NFr  r  r  s     r/   meta_replication_pad1dr         u==r1   c                    d|s#t        j                  t        |      dk(  d        j                  dk(  rdz  |\  j	                        }|z   z   |r&t        j                  |k  xr |k  fd       t        j                   j	                        k(   fd       j                  j                        S )Nr   r   c                       y)Nz padding size is expected to be 2r5   r5   r1   r/   rQ   z(_pad1d_backward_common.<locals>.<lambda>  rY   r1   r   c                  4    d d d  dj                    S r  r  r  s   r/   rQ   z(_pad1d_backward_common.<locals>.<lambda>  r  r1   c                  2    d dj                          S Nz(grad_output width unexpected. Expected: , Got: r   r  grad_outputr  s   r/   rQ   z(_pad1d_backward_common.<locals>.<lambda>  "    :8*GKL\L\]bLcKde r1   rB   rS   r   r   r   r   r   )	r  r   r  r  r  r  r  r  r  s	   ``   @@@@r/   _pad1d_backward_commonr    s    ES\Q&(RSzzQ
LE5jjG&HGO/	
 
LLK$$U++e
 ??5;;''r1   
grad_inputc                      t        | ||d      S r  r  r  r   r  s      r/   meta_reflection_pad1d_backwardr    s     "+ugTRRr1   c                      t        | ||d      S r  r  r  s      r/   meta_replication_pad1d_backwardr    s     "+ugUSSr1   c                   	
 ddd}d}t         |d        j                  }|dk(  r  j                  d      }dz  dz  |dz  }|\   j                  |      } j                        	 j                        
	z   z   
z   z   |rLt        j                  
k  xr 
k   fd       t        j                  	k  xr 	k   fd       t        j                  dk\  xs dk\  	
fd        j                  d	k(  r j                  |f      S  j                  ||f      S )
Nr   r   r   r      c                  4    d d d  dj                    S r  r  r  s   r/   rQ   z_pad2d_common.<locals>.<lambda>  r  r1   c                  4    d d d  dj                    S NzcArgument #6: Padding size should be less than the corresponding input dimension, but got: padding (rd   r  r  r  dim_hr   pad_bpad_ts   r/   rQ   z_pad2d_common.<locals>.<lambda>  r  r1   c                       d  d d d S )Nz
input (H:  W: z%) is too small. Calculated output H: r5   )input_hr  output_hr  s   r/   rQ   z_pad2d_common.<locals>.<lambda>&  s*    	gY /$$,:T(= r1   r   r  r   r   rB   rS   r   )r   r  r  
dim_slicesr  r   r  r  r  r  r  r  r  r  r  r  r  s   `      @@@@@@@@@@r/   _pad2d_commonr    sU   EEJFug15::DqyA

a
!(E5%ZZ
#FjjGjjG&H&HGO/	
 	GO/	
 
LLA&Q	
 zzQ(;<<(CDDr1   c                     t        | |d      S r  r  r  s     r/   meta_reflection_pad2dr  2  r  r1   c                     t        | |d      S r  r  r  s     r/   meta_replication_pad2dr  8  r  r1   c                     ddd}d}|j                   }|j                         dk(  r|d   }dz  dz  |dz  }|\  }}}}	||   }
|   }|   }||z   |	z   ||z   |z   t        j                   j	                        k(   fd       t        j                   j	                        k(   fd       |j                  |j                         S )Nr   r   r   r  c                  2    d dj                          S r  r   r  s   r/   rQ   z%meta_pad2d_backward.<locals>.<lambda>^  r  r1   c                  2    d dj                          S Nz)grad_output height unexpected. Expected: r  r   r  r  r  s   r/   rQ   z%meta_pad2d_backward.<locals>.<lambda>b  "    ;H:W[M]M]^cMdLef r1   )r   rq   rB   rS   r   r   )r  r   r  r  r  rP   r  r  r  r  r  r  r  r  r  r  r  s   `            @@@@r/   meta_pad2d_backwardr  >  s     EEIFJxxzQA

Q	!(E5%	"FGG&H&H	LLK$$U++e 
LLK$$U++f >>$**%%r1   c          	      $   	
 d	ddd}t         |d        j                  dk(  }|r% j                  d      }	dz  	dz  dz  |dz  }|\   j                  |      } j                        
 j                         j                  	      
z   z   z   z   z   z   |rrt        j                  k  xr k  	 fd       t        j                  k  xr k   fd       t        j                  
k  xr 
k   fd	       t        j                  dk\  xs dk\  xs dk\  
fd
       |r j                  |f      S  j                  |f      S )Nr   r   r   r   r      c                  4    d d d  dj                    S r  r  r  s   r/   rQ   z_pad3d_common.<locals>.<lambda>  r  r1   c                  4    d d d  dj                    S r  r  r  s   r/   rQ   z_pad3d_common.<locals>.<lambda>  r  r1   c                  4    d d d  dj                    S )NzcArgument #8: Padding size should be less than the corresponding input dimension, but got: padding (rd   r  r  r  )dim_dr   pad_bkpad_fs   r/   rQ   z_pad3d_common.<locals>.<lambda>  s2    %%*G2fX_UG:V[VaVaUbd r1   c                  ,    d  d d d d d S )Nz
input (D:  H: r  z%) is too small. Calculated output D: r5   )input_dr  r  output_dr  r  s   r/   rQ   z_pad3d_common.<locals>.<lambda>  s7    	gYd7) <$$,:T(4zK r1   r  )r   r  r  r  
batch_moder  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  s   `      @@@@@@@@@@@@@@@r/   _pad3d_commonr  g  s   EEEIug15qJA


Q	07-E5%vZZ	"FjjGjjGjjG'H&H&HGO/	
 	GO/	
 	GO0 0	
 
LLA7Q7(a-	
 	
 (HMNN(HEFFr1   c                     t        | |d      S r  r  r  s     r/   meta_reflection_pad3dr     r  r1   c                     t        | |d      S r  r  r  s     r/   meta_replication_pad3dr    r  r1   c                     t        j                  t        |      dk(  d        |j                  dkD  sJ  j                  |j                  k(  sJ ddd|j                  dk(  rdz  dz  dz  |\  }}}}}}|j	                        }	|j	                        }
|j	                        }|	|z   |z   |
|z   |z   ||z   |z   t        j                   j	                        k(   fd       t        j                   j	                        k(   fd       t        j                   j	                        k(   fd	       |j                  |j                        S )
N   c                       y)Nz padding size is expected to be 6r5   r5   r1   r/   rQ   z%meta_pad3d_backward.<locals>.<lambda>  rY   r1   r   r   r   r  c                  2    d dj                          S r  r   r  s   r/   rQ   z%meta_pad3d_backward.<locals>.<lambda>  r  r1   c                  2    d dj                          S r  r   r  s   r/   rQ   z%meta_pad3d_backward.<locals>.<lambda>  r  r1   c                  2    d dj                          S )Nz(grad_output depth unexpected. Expected: r  r   )r  r  r  s   r/   rQ   z%meta_pad3d_backward.<locals>.<lambda>  r  r1   r  )r  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  s   `           @@@@@@r/   meta_pad3d_backwardr	    s_    
LLW"$NO::>>uzz)))EEEzzQ


07-E5%vjjGjjGjjG'H&H&H	LLK$$U++e 
LLK$$U++f 
LLK$$U++e
 ??5;;''r1   pc                 J   t        j                  | j                         d        | j                  d      }|dk  r0| j	                  dg      j                  t         j                        S | j	                  ||dz
  z  dz  f      j                  t         j                        S )Nc                       y)Nz(_pdist_forward requires contiguous inputr5   r5   r1   r/   rQ   z%meta__pdist_forward.<locals>.<lambda>  rY   r1   r   r   r   r   )rB   rS   r.  r   r   r   r  )r   r
  r   s      r/   meta__pdist_forwardr    s     
LLP 			!AAv~~qc"%%E4R4R%SS~~qAE{a/125588 6 
 	
r1   gradpdistc                     t        j                  |j                         d        t        j                  |j                         d        t        j                  |t         j                        S )Nc                       y)Nz._pdist_backward requires self to be contiguousr5   r5   r1   r/   rQ   z&meta__pdist_backward.<locals>.<lambda>  rY   r1   c                       y)Nz/_pdist_backward requires pdist to be contiguousr5   r5   r1   r/   rQ   z&meta__pdist_backward.<locals>.<lambda>  rY   r1   r   )rB   rS   r.  r   r  )r  r   r
  r  s       r/   meta__pdist_backwardr    sW     
LLV 
LLX D0N0NOOr1   )r  r  c                   	
 j                  d      }j                  d      }j                  d      } j                  |||f       t        j                  j	                         dk(  d        t        j                  j	                         dk(  d        t        j                   j
                  j
                  cxk(  xr j
                  k(  nc  fd       j                  }j                  	|d   
|d   t        j                  	d   
k(  xr 	d   k(  	
fd        j                   j                               S )	Nr   r   r   r   c                       yNzbatch1 must be a 3D tensorr5   r5   r1   r/   rQ   zmeta_baddbmm.<locals>.<lambda>  rY   r1   c                       yNzbatch2 must be a 3D tensorr5   r5   r1   r/   rQ   zmeta_baddbmm.<locals>.<lambda>  rY   r1   c                  V    dj                    d j                    dj                    S )Nz+Input dtypes must be the same, got: input: z
, batch1: z
, batch2: r{   )batch1batch2r   s   r/   rQ   zmeta_baddbmm.<locals>.<lambda>  s0    =djj\TZT`T`Saaklrlxlxkyz r1   c            	      .    d d d d    d d    d	S Nz@Expected size for first two dimensions of batch2 tensor to be: [rd   z] but got: [r   r   ].r5   batch2_sizesbscontraction_sizes   r/   rQ   zmeta_baddbmm.<locals>.<lambda>  s:    t2&'|LO3DB|TUFWWY[ r1   )r   rS  rB   rS   rq   rJ   r   r   )r   r  r  r  r  dim1dim2dim3batch1_sizesr   r!  r"  s   ```      @@@r/   meta_baddbmmr'    s    ;;q>D;;q>D;;q>D;;dD)*D	LL"$HI	LL"$HI	LL

fll2fll2z <<L<<L	aB#A	LLQ2E,q/5E"E	
 >>$))+&&r1   c                H    t        j                  |       j                         S r,   rB   r   r   r   r   s     r/   meta_bernoullir+    s     D!,,..r1   c                     | S r,   r5   r   r
  r   s      r/   meta_bernoulli_r.        Kr1   c                 H    t        j                  |       j                         S r,   r)  r-  s      r/   meta_bernoulli_pr1  $  s     D!,,..r1   c                 ,    t        j                  |       S r,   rB   r   r*  s     r/   meta_poissonr4  *  s     D!!r1   c                     t        j                  |
| j                         k  d        t        j                  | t         j                        }t        j                  |       |fS )Nc                       y)NzJError in fused_moving_avg_obs_fake_quant_cpu: ch_axis must be < self.dim()r5   r5   r1   r/   rQ   z6meta__fused_moving_avg_obs_fq_helper.<locals>.<lambda>B  rY   r1   r{   )rB   rS   rq   r   bool)r   observer_onfake_quant_onrunning_minrunning_maxscale
zero_pointaveraging_const	quant_min	quant_maxch_axisper_row_fake_quantsymmetric_quantmasks                 r/   $meta__fused_moving_avg_obs_fq_helperrE  0  sO      
LL$((*\ D

3DT"D))r1   c                 H   t        j                  | j                         dk(  d        t        j                  |j                         dk(  d        | j                  \  |j                  \  t        j                  k(  fd       | j	                        S )Nr   c                       y)Nza must be 2Dr5   r5   r1   r/   rQ   zmeta_mm.<locals>.<lambda>K  rY   r1   c                       y)Nzb must be 2Dr5   r5   r1   r/   rQ   zmeta_mm.<locals>.<lambda>L  rY   r1   c            	      "    d d  d d d	S )Nz/a and b must have same reduction dim, but got [rd   z] X [r  r5   )M1M2Nr  s   r/   rQ   zmeta_mm.<locals>.<lambda>Q  s(    A!Brd%PRtSUVWUXXZ[ r1   )rB   rS   rq   r   r   )abrJ  rK  rL  r  s     @@@@r/   meta_mmrO  H  sz     
LLA56	LLA56GGEArGGEB	LL
b[ ;;q!r1   c                      |r(t         fdt         j                        D              S t        j                   j
                        S )Nc              3   H   K   | ]  }|vrj                   |   nd   yw)r   Nr  )r^   r   dimsr   s     r/   r`   z+_compute_reduction_shape.<locals>.<genexpr>X  s$     UqatmTZZ]:Us   ")rR   r   r   r:   compute_reduction_output_shaper   )r   rR  rZ  s   `` r/   rX  rX  V  s7    UE$))DTUUU//

DAAr1   c                    t        | t        j                  j                        r| j                  j
                  S t        | d      rEt        | j                  d      r/| j                  j
                  dk7  r| j                  j
                  S y)Nrn   rf   rl   rB  )r\   rB   _subclasses
FakeTensorfake_devicerf   hasattrrn   )r  s    r/   rC  rC  a  sg    &%++667!!&&&!FMM6*MM&(}}!!!r1   input_tensorr   r  dilationis_transposedgroupsoutput_paddingc                    dt         dt         dt         dt         dt         dt         fd}dt         dt         dt         dt         dt         dt         dt         fd	}	|j                  d
d  }
| j                  d
d  }|r||j                  d   z  }n<|j                  d   }|j                  d   |z  | j                  d   k7  rt        d      | j                  d   |g}t        |t              r|gt        |      z  }n t        |      dk(  r|d   gt        |      z  }t        |t              r|gt        |      z  }n t        |      dk(  r|d   gt        |      z  }t        |t              r|gt        |      z  }n t        |      dk(  r|d   gt        |      z  }d }|rCt        |t              r|gt        |      z  }n#t        |      dk(  r|d   gt        |      z  }n|}t        t        |            D ]]  }|r/|j                   |	||   ||   ||   |
|   ||   ||                4|j                   |||   ||   ||   |
|   ||                _ |S )Nlnr
  r   r1  rr  r8  c                 6    | d|z  z   ||dz
  z  z
  dz
  |z  dz   S )a  
        Formula to apply to calculate the length of some dimension of the output

        See: https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html

        Args:
            ln: length of the dimension
            p: padding in that dim
            d: dilation in that dim
            k: kernel size in that dim
            s: stride in that dim
        Returns:
            The output length
        r   r   r5   )r_  r
  r   r1  rr  s        r/   _formulaz+calc_conv_nd_return_shape.<locals>._formulax  s.     QU
Q!a%[(1,2Q66r1   r-   c                 <    | dz
  |z  d|z  z
  ||dz
  z  z   |z   dz   S )a  
        Formula to apply to calculate the length of some dimension of the output
        if transposed convolution is used.
        See: https://pytorch.org/docs/stable/generated/torch.nn.ConvTranspose2d.html

        Args:
            ln: length of the dimension
            p: padding in that dim
            d: dilation in that dim
            k: kernel size in that dim
            s: stride in that dim
            op: output padding in that dim

        Returns:
            The output length
        r   r   r5   )r_  r
  r   r1  rr  r-   s         r/   _formula_transposedz6calc_conv_nd_return_shape.<locals>._formula_transposed  s2    " Q!|a!e#a1q5k1B6::r1   r   r   r   zInvalid channel dimensions)r  r   r   r\   r   r   r   r   )rY  r  r   r  rZ  r[  r\  r]  ra  rc  kernel_sizerR  out_channels	ret_shapeoutput_padding_listr   s                   r/   calc_conv_nd_return_shaperh  n  s   7S 7S 7S 7S 7S 7S 7"; ; ; ; ; ; ;QT ;& ,,qr"Kab!DQ/||A<<?V#|'9'9!'<<;<<##A&5I&'"CI%	V	)s4y('7#)c$i'	W	1:,T*(G$:D	)	X!	QK=3t9,/3ng.#1"2SY"> A%#1!#4"5D	"A"03t9 #GAJQKN1I'*	 a'!*hqk;q>6RS9U$ r1   c                 b    t         j                  j                  |       t         j                  k(  S r,   rB   _prims_commonsuggest_memory_formatchannels_lasttens    r/   is_channels_lastrp    s$    44S9U=P=PPPr1   running_meanrunning_vartrainingexponential_average_factorepsilonc                 r     j                   }||j                   n|j                   }	||j                   n|j                   }
 fd} j                  |      j                   |             }|r# j                  |	      } j                  |
      }n" j                  d      } j                  d      }|||fS )Nc                      t               rt        j                  S  j                  t        j                        rt        j                  S t        j                  S r:  )rp  rB   rm  r.  r   )rY  s   r/   pick_memory_formatz2meta_miopen_batch_norm.<locals>.pick_memory_format  sF    L)&&&%%E4K4K%L***&&&r1   r   r   )r   r   r   )rY  r  r  rq  rr  rs  rt  ru  r   save_mean_shapesave_var_shaperx  r   	save_meansave_vars   `              r/   meta_miopen_batch_normr~    s     ""I -9,Dl((&,,O*5*A[&&v||N' 
 
 
+
.
.=O=Q
.
RC **?;	)).9 **40	))$/	8##r1   c	           
            fd}	t         ||||||r|nd       }
d}d} j                  |      dk(  rd|
|<    j                  |
      }|j                   |	             }|S )Nc                  d   t               dk(  r&t               st              r+t        j                  S t               rt        j                  S  j	                  t        j
                        rt        j
                  S  j	                  t        j                        rt        j                  S y NrB  r   )rC  rp  rB   rm  r.  r   preserve_format)rY  r  s   r/   rx  z%meta_conv.<locals>.pick_memory_format		  s    |$.-1A&1I***-***%%E4K4K%L***''e6K6K'L((( Mr1   r   r   r   )rh  r   r   r   )rY  r  r  r   r  rZ  r[  r]  r\  rx  	shape_outinput_channels_dimoutput_channels_dimr   s   ``            r/   	meta_convr    s    
) *'T	I +,1)*	%&

 
 
+C
&&13&
4CJr1   mkldnnc
           
          t        | ||||d|g       }
| j                  |
      }t        j                  }| j	                         dk(  rt        j
                  }|j                  |      }|S )NFr  r   )rh  r   rB   rm  rq   channels_last_3dr   )rY  r  r  r  r   rZ  r\  attrscalars	algorithmr  r   out_memory_formats                r/   meta_mkldnn_convolution_defaultr  /	  sp     .&&'8UFB
	 $$Y/!//" % 6 6ff#4f5
r1   c                 b    | j                  g | j                  d d |j                  d         S Nr   r   r   r   )rY  r  r  r  r  r  s         r/   meta_linear_pointwise_defaultr  F	  s5     %%&Q(:(:3B(?&Qa&QRRr1   mklc                 b    | j                  g | j                  d d |j                  d         S r  r  )rY  packed_weightorig_weightr  r   s        r/   meta_mkl_linearr  Q	  s:    ))@,$$Sb)@;+<+<Q+?@ r1   onednnc           
          t        | ||||	d|
d       }|t        j                  t        j                  fv sJ | j	                  ||      }|j                  t        j                        }|S )NFr{   r   )rh  rB   r*  r,  r   r   rm  )r?   x_scalex_zpww_scalew_zpr  r   r  rZ  r\  output_scaleoutput_zero_pointoutput_dtyper  r  r  r  r   s                      r/   meta_qconv2d_pointwiser  [	  sp    ( .	
	 u~~>>>>kk)<k8ff5#6#6f7
r1   c                     t        | j                        }|j                  d   |d<   |	t        j                  t        j                  fv sJ | j                  ||	      }|S )Nr   r   r{   )r   r   rB   r*  r,  r   )r?   r  r  r  r  r  r  r  r  r  post_op_namepost_op_argspost_op_algorithmr3  r   s                  r/   meta_qlinear_pointwiser  ~	  sU    " AGG}771:Ru~~>>>>kk,lk;
r1   c                 v    t        | j                        }|j                  d   |d<   | j                  |      }|S )Nr   r   )r   r   r   )r?   r  r  r3  r   s        r/   meta_linear_dynamic_fp16r  	  s6     AGG}771:Rkk,'
r1   	quantizedr   c                 .   t        | |||||      \  }}}| j                         dk(  r| j                  d      nd}	t        j                  }
| j                         dk(  r|||g}n|	|||g}t        j
                  || j                  | j                  |
      S Nr  r   r   rJ  )#max_pool2d_checks_and_compute_shaperq   r   rB   rm  rv   rJ   rn   r   rd  r   r  rZ  	ceil_modenInputPlaneoutputHeightoutputWidthr  r   r   s               r/   meta_quantized_max_pool2dr  	  s     0;9
		
 $)99;!#3B++99;!{;DK{CD{{++<<'	
 	
r1   c                      t        j                   j                         k(  xr  j                     k(   fd       y )Nc                  j    d  d d ddj                          d dj                      z   S )NzExpected a tensor of dimension z and tensor.size[z] == rd   zbut got : dimension z] = rq   r   )rq   dim_sizer   r  s   r/   rQ   z check_dim_size.<locals>.<lambda>	  sP    1#6GzQVW[V\\^_ .?zfll[cNdMe
fg r1   )rB   rS   rq   r   )r  rq   r  r   s   ````r/   check_dim_sizer  	  s6    	LL

>X 6$ >	gr1   c                    d } |d|      \  }}	t        j                  t        |      dv d        t        |      dk(  r||	}}
n%t        |      dk(  r|d   |d   }}
n |d|      \  }
} |d|      \  }}t        j                  |d u xs |dk7  d	        | j                         d
k(  r| j	                  d      nd}| j	                  d      }| j	                  d      }| j	                  d      }t        ||||
d|      }t        ||	||d|      }t        j                  |       }t        | ||	|
|||dd||||||       | j                         dk(  r|||g}n||||g}t        j                  || j                  | j                  |      S )Nc                      t        j                  t        |      dv  fd       |d   }t        |      dk(  r|n|d   }||fS )Nr   r   c                      d  dS )Nzavg_pool2d: 4 must either be a single int, or a tuple of two intsr5   r  s   r/   rQ   z1meta_avg_pool2d.<locals>.unpack.<locals>.<lambda>	      l4&(\] r1   r   r   rB   rS   r   r  rk  HWs   `   r/   unpackzmeta_avg_pool2d.<locals>.unpack	  G    H]	
 FSQACF!tr1   rd  r   r   r   c                       yNzOavg_pool2d: stride must either be omitted, a single int, or a tuple of two intsr5   r5   r1   r/   rQ   z!meta_avg_pool2d.<locals>.<lambda>	  rY   r1   r   r   r   r  c                       yNzdivisor must be not zeror5   r5   r1   r/   rQ   z!meta_avg_pool2d.<locals>.<lambda>	  rY   r1   r  r  r  r   r   rJ  )rB   rS   r   rq   r   pooling_output_shaper:   rl  pool2d_shape_checkrv   rJ   rn   )r   rd  r   r  r  count_include_paddivisor_overrider  kHkWdHdWpadHpadWr  r  inputHeight
inputWidthr  r  r   r   s                         r/   meta_avg_pool2dr  	  s    M;/FB	LLFy a 6{aRB	V	F1IB&)B	7+JD$	LLD 9$4$9*
  %yy{a/UZZ^QF**R.K**R.KBJ'Rr1iPL&z2tRINK//6M



		$ yy{a\;7\;?;;kk||#	 r1   c                     t        | ||||||dd|	|
||||       | j                         }|	}t        |||dz
  |       t        |||dz
  |       t        |||dz
  |       y )Nr   r   r   )r  rq   r  )r   
gradOutputr  r  r  r  r  r  r  r  r  r  r  r  
mem_formatr   nOutputPlanes                    r/   avg_pool2d_backward_shape_checkr  
  s    " 



		$ 99;DL:tTAX|<:tTAX|<:tTAX{;r1   c                    t        j                  t        |      dk(  xs t        |      dk(  d        |d   }t        |      dk(  r|n|d   }	t        j                  t        |      dk(  xs t        |      dk(  xs t        |      dk(  d        t        |      dk(  r|n|d   }
t        |      dk(  r|	nt        |      dk(  r|
n|d   }t        j                  t        |      dk(  xs t        |      dk(  d        |d   }t        |      dk(  r|n|d   }t        j                  |d u xs |dk7  d        |j                  }|j	                         dk(  r|d	   nd}|d
   }|d   }|d   }t        ||||
d|      }t        ||	||d|      }t        j                  |      }t        || |||	|
|||||||||       t        j                  ||j                  |j                  |      S )Nr   r   c                       y)NzKavg_pool2d: kernel_size must either be a single int, or a tuple of two intsr5   r5   r1   r/   rQ   z*meta_avg_pool2d_backward.<locals>.<lambda>V
  rY   r1   r   c                       yr  r5   r5   r1   r/   rQ   z*meta_avg_pool2d_backward.<locals>.<lambda>\
  rY   r1   c                       y)NzGavg_pool2d: padding must either be a single int, or a tuple of two intsr5   r5   r1   r/   rQ   z*meta_avg_pool2d_backward.<locals>.<lambda>b
  rY   r1   c                       yr  r5   r5   r1   r/   rQ   z*meta_avg_pool2d_backward.<locals>.<lambda>i
  rY   r1   r  r  r  r  r   rJ  )rB   rS   r   r   rq   r  r:   rl  r  rv   rJ   rn   )gradOutput_r   rd  r   r  r  r  r  r  r  r  r  r  r  
input_sizer  r  r  r  r  r  r  s                         r/   meta_avg_pool2d_backwardr  H
  s    
LLKA6[!1Q!6] 
QB;1$+a.B	LLFq@CK1,@Fq0@a 6{aVAYB6{a3v;!+;RB	LLG.S\Q.Y 1:Dw<1$4'!*D	LLD 9$4$9*
 J$yy{a/Z^QFR.KR.KBJ'Rr1iPL&z2tRINK,,U3J#



$ ;;kk|| 	 r1   c                 N   t        j                  t        |      dv d        |d   }t        |      dk(  r|n|d   }t        |      dk(  r|n|d   }	t        j                  | xs t        |      dv d        |s|n|d   }
|s|nt        |      dk(  r|
n|d   }|s|	nt        |      dk(  r|
n|d   }t        j                  t        |      dv d        |d   }t        |      dk(  r|n|d   }t        |      dk(  r|n|d   }t        j                  | j                  dv d	        t        j                  | xs |dk7  d
        | j	                  d      }| j	                  d      }| j	                  d      }| j	                  d      }| j	                  d      }t        ||||
d|      }t        ||||d|      }t        ||	||d|      }t        | ||||	|
|||||ddd||||||dd       | j                  dk(  r| j                  ||||f      S | j                  |||||f      S )Nr   r   c                       yNzFavg_pool3d: kernel_size must be a single int, or a tuple of three intsr5   r5   r1   r/   rQ   z!meta_avg_pool3d.<locals>.<lambda>
  rY   r1   r   r   r   c                       yNzJavg_pool3d: stride must be omitted, a single int, or a tuple of three intsr5   r5   r1   r/   rQ   z!meta_avg_pool3d.<locals>.<lambda>
  rY   r1   c                       yNzBavg_pool3d: padding must be a single int, or a tuple of three intsr5   r5   r1   r/   rQ   z!meta_avg_pool3d.<locals>.<lambda>
  rY   r1   r  r  c                       yNz9non-empty 4D or 5D (batch mode) tensor expected for inputr5   r5   r1   r/   rQ   z!meta_avg_pool3d.<locals>.<lambda>
  rY   r1   c                       yr  r5   r5   r1   r/   rQ   z!meta_avg_pool3d.<locals>.<lambda>
  rY   r1   r  r  r  r   zavg_pool3d()T)check_input_sizer  )rB   rS   r   r   r   r  pool3d_shape_checkr   )r   rd  r   r  r  r  r  kTr  r  dTr  r  padTr  r  r  nslicesitimeiheightiwidthotimeoheightowidths                           r/   meta_avg_pool3dr   
  sU    
LLKF"X 
QB;1$+a.B;1$+a.B	LL
+c&kV+\ vayBc&kQ&6F1IBc&kQ&6F1IB	LLGT 1:Dw<1$4'!*Dw<1$4'!*D	LL

fK
 
LL5 0A 5*
 ZZ]FjjnGJJrNEjjnGZZ^F D"aCE"7Bb!YGG!&"dB9EF





			-2 zzQ@AAHIIr1   c                    t        j                  t        |      dv d        |d   }t        |      dk(  r|n|d   }	t        |      dk(  r|n|d   }
t        j                  | xs t        |      dv d        |s|n|d   }|s|	nt        |      dk(  r|n|d   }|s|
nt        |      dk(  r|n|d   }t        j                  t        |      dv d        |d   }t        |      dk(  r|n|d   }t        |      dk(  r|n|d   }t        j                  |j                  dv d	        t        j                  | xs |dk7  d
        |j	                  d      }|j	                  d      }|j	                  d      }|j	                  d      }t        ||||d|      }t        ||	||d|      }t        ||
||d|      }t        || |||	|
||||||||||||d       |j                  |j                        S )Nr  c                       yr  r5   r5   r1   r/   rQ   z*meta_avg_pool3d_backward.<locals>.<lambda>
  rY   r1   r   r   r   c                       yr  r5   r5   r1   r/   rQ   z*meta_avg_pool3d_backward.<locals>.<lambda>
  rY   r1   c                       yr  r5   r5   r1   r/   rQ   z*meta_avg_pool3d_backward.<locals>.<lambda>  rY   r1   r  c                       yr  r5   r5   r1   r/   rQ   z*meta_avg_pool3d_backward.<locals>.<lambda>  rY   r1   c                       yr  r5   r5   r1   r/   rQ   z*meta_avg_pool3d_backward.<locals>.<lambda>  rY   r1   r  r  r  r   zavg_pool3d_backward())	rB   rS   r   r   r   r  avg_pool3d_backward_shape_checkr   r   )r  r   rd  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  otime_for_shape_checkoheight_for_shape_checkowidth_for_shape_checks                           r/   meta_avg_pool3d_backwardr  
  s    
LLKF"X 
QB;1$+a.B;1$+a.B	LL
+c&kV+\ vayBc&kQ&6F1IBc&kQ&6F1IB	LLGT 1:Dw<1$4'!*Dw<1$4'!*D	LL

fK
 
LL5 0A 5*
 jjnGJJrNEjjnGZZ^F0D"aS27Bb!YW1&"dB9U#





', ??5;;''r1   c                 ,    t        j                   j                  dk(  xs  j                  dk(   fd        j                  d d t	        |      z   }t        j                         }t        j                  | j                   j                  |      S )Nr   r  c                  "    d j                    S )Nz"Expected 3D or 4D tensor, but got r  r   s   r/   rQ   z*meta_adaptive_avg_pool2d.<locals>.<lambda>;      4TZZLA r1   r  rJ  )
rB   rS   r   r   rR   r:   rl  rv   rJ   rn   )r   output_sizer3  r   s   `   r/   meta_adaptive_avg_pool2dr  7  s|    	LL		Q($))q.A ::cr?U;%77L//5M ;;jj{{#	 r1   c                      t        j                   j                  dk(  xs  j                  dk(   fd        j                   j                  d d t        |      z         S )Nr  r  c                  "    d j                    S )Nz"Expected 4D or 5D tensor, but got r  r   s   r/   rQ   z*meta_adaptive_avg_pool3d.<locals>.<lambda>M  r  r1   r  )rB   rS   r   r   r   rR   )r   r  s   ` r/   meta_adaptive_avg_pool3dr  I  sO    	LL		Q($))q.A >>$**Sb/E+,>>??r1   c                      j                   }t        d|      D ].  t        j                   j	                        dkD   fd       0 t        j                  |dk(  xs |dk(  fd       t        j                  j
                   j
                  k(   fd       t        j                  }t              rt        j                  }j                  j                        j                  |      S )	Nr   r   c                  *    d j                    d dS )Nz{adaptive_avg_pool2d_backward(): Expected grad_output to have non-zero                       size for non-batch dimensions,  with dimension  being emptyr  )grad_outr   s   r/   rQ   z4meta__adaptive_avg_pool2d_backward.<locals>.<lambda>X  s&     66>nn5EEUVWUXXdf r1   r   r  c                  "    d j                    S )NzBadaptive_avg_pool2d_backward(): Expected 3D or 4D tensor, but got r  r   s   r/   rQ   z4meta__adaptive_avg_pool2d_backward.<locals>.<lambda>]  s    TUYU_U_T`a r1   c                  <    dj                    d j                    S Nexpected dtype z! for `grad_output` but got dtype r{   )r  r   s   r/   rQ   z4meta__adaptive_avg_pool2d_backward.<locals>.<lambda>a  s    /$**-Nx~~N^_ r1   r   )r   r   rB   rS   r   rJ   r   rp  rm  r   r   r   )r  r   r   r   r   s   ``  @r/   "meta__adaptive_avg_pool2d_backwardr  R  s    ==D1d^ 
MM!q f	

 
LL	TQYa 
LL

hnn$_ ++M++>>$**%((}(EEr1   c                 d    t        | d       t        j                  |t        j                        S )Nadaptive_avg_pool3d_backwardr   )!_adaptive_pool_empty_output_checkrB   r   r  r  r   s     r/   "meta__adaptive_avg_pool3d_backwardr"  i  s(     &k3QRD0N0NOOr1   r  c                       j                   }t        d|      D ]/  t        j                   j	                        dkD   fd       1 y )Nr   r   c                  .      dj                    d dS )Nzc(): Expected grad_output to have non-zero size for non-batch dimensions, but grad_output has sizes r  r  r  )r  r  r   s   r/   rQ   z3_adaptive_pool_empty_output_check.<locals>.<lambda>u  s/    * --8->->,??OPQsR^` r1   )r   r   rB   rS   r   )r  r  r   r   s   `` @r/   r   r   p  sG    D1d^ 
Q!#	

r1   c                      j                   }t        j                  |dv  fd       t        d|      D ].  t        j                   j	                        dkD   fd       0 t        j                  t        |      dk(  d        d}d}d} j                   dk(  r j	                  d      }|dz  } j	                  |dz
        }|\  }} j                   d	k(  r;|||f} j                  |      }	 j                  |t        j                  
      }
|	|
fS ||||f}t        j                         } j                  |      j                  |      }	 j                  |t        j                  
      j                  |      }
|	|
fS )Nr   r  c                  "    d j                    S )Nz:adaptive_max_pool2d(): Expected 3D or 4D tensor, but got: r  r   s   r/   rQ   z*meta_adaptive_max_pool2d.<locals>.<lambda>      LU[[MZ r1   r   r   c                  *    dj                    d  dS )Nzjadaptive_max_pool2d(): Expected input to have non-zero size for non-batch dimensions, but input has sizes r  r  r  r   r   s   r/   rQ   z*meta_adaptive_max_pool2d.<locals>.<lambda>  %    '',{{m3CA3lT r1   r   c                       y)NzCadaptive_max_pool2d(): internal error: output_size.size() must be 2r5   r5   r1   r/   rQ   z*meta_adaptive_max_pool2d.<locals>.<lambda>  rY   r1   r  r   r{   r   )r   rB   rS   r   r   r   r   r   r:   rl  r   )r   r  r   dimHsizeBsizeDosizeHosizeWr   r   r   r   r   s   `           @r/   meta_adaptive_max_pool2dr3  |  s|    ::D	LLZ 1d^ 
JJqMA	

 
LLKAU
 DEEzzQ

1	JJtax E NFFzzQFF+	ooi(//)5;;/?G|E662	33E:ooi(++-+H//)5;;/?BB' C 
 G|r1   c                 N     j                   }t        j                  |dv  fd       t         d       t        j                  j                   j                  k(   fd       t        j                        }j                  j                        j                  |      S )Nr&  c                  "    d j                    S )NzKadaptive_max_pooling2d_backward(): Expected 3D or 4D grad_output, but got: r  r  s   r/   rQ   z3meta_adaptive_max_pool2d_backward.<locals>.<lambda>  s    ]^i^o^o]pq r1   adaptive_max_pool2d_backwardc                  <    dj                    d j                    S r  r{   )r  r   s   r/   rQ   z3meta_adaptive_max_pool2d_backward.<locals>.<lambda>  s!    /%++.OP[PaPaObc r1   r   )
r   rB   rS   r   rJ   r:   rl  r   r   r   )r  r   r   r   r   s   ``   r/   !meta_adaptive_max_pool2d_backwardr9    s     D	LLq
 &k3QR	LL{(((c
 //6M??5;;'***GGr1   c                      j                   }t        j                  |dv  fd       t        d|      D ].  t        j                   j	                        dkD   fd       0 t        j                  t        |      dk(  d        d}d}d}|dk(  r j	                  d      }|dz  } j	                  |      }|\  }}}|d	k(  r||||f}	n|||||f}	 j                  |	      }
 j                  |	t        j                  
      }|
|fS )Nr  c                  "    d j                    S )Nz:adaptive_max_pool3d(): Expected 4D or 5D tensor, but got: r  r(  s   r/   rQ   z*meta_adaptive_max_pool3d.<locals>.<lambda>  r)  r1   r   r   c                  *    dj                    d  dS )Nzjadaptive_max_pool3d(): Expected input to have non-zero size for non-batch dimensions, but input has sizes r  r  r  r+  s   r/   rQ   z*meta_adaptive_max_pool3d.<locals>.<lambda>  r,  r1   r   c                       y)NzCadaptive_max_pool3d(): internal error: output_size.size() must be 3r5   r5   r1   r/   rQ   z*meta_adaptive_max_pool3d.<locals>.<lambda>  rY   r1   r  r  r{   )r   rB   rS   r   r   r   r   r   )r   r  r   dimDr/  r0  osizeTr1  r2  r   r   r   r   s   `           @r/   meta_adaptive_max_pool3dr@    s    ::D	LLZ 1d^ 
JJqMA	

 
LLKAU
 DEEqy

1	JJtE(FFFqyFFF3	E666:	
//)
$Cooiu{{o;G<r1   c                 P    t        | d       |j                  |j                        S )Nadaptive_max_pool3d_backward)r   r   r   )r  r   r   s      r/   !meta_adaptive_max_pool3d_backwardrC    s"     &k3QR??5;;''r1   c                 >    |t        d      | j                  |      S )Nz:cannot repeat_interleave a meta tensor without output_size)r   r   )repeatsr  s     r/   meta_repeat_interleave_TensorrF    s%    WXX[))r1   c                     | j                   j                  sJ |j                   j                  sJ t        | j                  |j                        }| j	                  |t        | j                               S rU  )rJ   r   r#   r   r   r   )realimagr   s      r/   meta_complexrJ    s[     ::''''::''''!$**djj9I>>)+Ftzz+R>SSr1   )
fill_valuer   rK  c                d    | j                  || j                         ft        j                        S rU  )r   rq   rB   r   )r   r   rK  s      r/   nonzero_staticrM    s&     >>4,EJJ>??r1   c           
          t        j                  t              d        g }t              D ]  \  ft        j                  j                  t         j
                  t         j                  t         j                  t         j                  fv d        j                  t         j                  t         j                  fv rȉj                         }t        |      t        j                  j                  z    j                  k   fd       t        j                        D ]`  t        j                  j                      j                  z      k(   fd       |j                  |j                  d             b ]|j                         p|j                          |t        j                  t               j                  k   fd       dd lm} t%         |j&                         t               j                  k  r*j                  d        t               j                  k  r*d}d}D ]  |dk(  rd}|dk(  rd	} n d
}|sg }g }t              D ]*  \  	|j                         |j                         , t              D ]*  \  	|j                         |j                         ,  j)                  |       |g }	g }
g }t              D ]\  \  }@|r|
j                   j                  |          )|	j                   j                  |          Ht%        j                        }^  j+                  |	|z   |
z         S )Nc                       y)Nz#at least one index must be providedr5   r5   r1   r/   rQ   z#meta_index_Tensor.<locals>.<lambda>  rY   r1   c                       y)Nz?tensors used as indices must be long, int, byte or bool tensorsr5   r5   r1   r/   rQ   z#meta_index_Tensor.<locals>.<lambda>  rY   r1   c                  "    d j                    S )N)too many indices for tensor of dimension r  r   s   r/   rQ   z#meta_index_Tensor.<locals>.<lambda>  s    G		{S r1   c            	      N    dj                    d  dj                    dz    S )NzThe shape of the mask z
 at index z0 does not match the shape of the indexed tensor r  )r   r}   jr1  r   s   r/   rQ   z#meta_index_Tensor.<locals>.<lambda>  s<    "8ZPQs SJJN**U_`ade`e_f!h r1   r   c                  <    dj                    dt                dS )NrR  z (got re   )r   r   )r   r   s   r/   rQ   z#meta_index_Tensor.<locals>.<lambda>*  s!    ;DII;fSQX\NZ[\ r1   r   Fr   T)rB   rS   r7  	enumeraterJ   r   r  r  nonzeror   r   r   r   r   r   selecttorch._refs_refsr   r$   r   r   )r   r   r  rW  refsstatehas_contiguous_subspacerR  transposed_indicesbefore_shapeafter_shapereplacement_shaperq   r   r}   rT  r1  s   ``           @@@@r/   meta_index_Tensorrb  	  s   	LLg MN &(Fg& !5LL

EIIuzz5::NNY {{uzz5::66--/K""

Ndii/S uzz* 8A&&A$**QU*;;h
 MM'..A"678 e$MM% /!0 G	LLG		!\
 (4(('23G
g,
"t g,
" E# 'A: aZ} ' #'
 #!'* 	1HAu A"))%0	1 "'* 	1HAu}A"))%0	1 ||D!$ !LK#%( 2
U= ""4::c?3##DJJsO4 $U[[ 12 >>,)::[HIIr1   c                     d }d }d }|
d   r| j                  |j                               }|
d   r| j                  |j                               }|
d   r| j                  |      }|||fS )Nr   r   r   r   r   )grad_output_input_weight_bias_sizes_optr   r  rZ  
transposedr]  r\  output_maskbackend_grad_inputbackend_grad_weightbackend_grad_biass                 r/   meta_convolution_backwardrn  m  sy      1~)33FKKMB1~*44W\\^D1~(22>B 35FGGr1   c                   j                  d      }j                  d      }| j                  ||f      } t        j                  j	                         dk(  d        t        j                  j	                         dk(  d        t        j                  j                  d      j                  d      k(  fd       t        j                  j                  d      j                  d      k(  fd       t        j                  | j                  d      |k(  xr | j                  d      |k(  d	        | j                  | j                               S )
Nr   r   r   c                       yr  r5   r5   r1   r/   rQ   zmeta_addbmm.<locals>.<lambda>  rY   r1   c                       yr  r5   r5   r1   r/   rQ   zmeta_addbmm.<locals>.<lambda>  rY   r1   r   c                  P    d j                  d       dj                  d       S )Nz8batch1 and batch2 must have same number of batches, got r   r   r   r  r  s   r/   rQ   zmeta_addbmm.<locals>.<lambda>  s.    J6;;WX>JZZ_`f`k`klm`n_op r1   c            
          d j                  d       d j                  d       dj                  d       dj                  d       d	S )Nz#Incompatible matrix sizes for bmm (r   r?   r   r   re   r   rs  s   r/   rQ   zmeta_addbmm.<locals>.<lambda>  sQ    1&++a.1A6;;q>BR S;;q>"!FKKN#316 r1   c                       y)Nz.self tensor does not match matmul output shaper5   r5   r1   r/   rQ   zmeta_addbmm.<locals>.<lambda>  rY   r1   )r   rS  rB   rS   rq   r   )r   r  r  r  r  r#  r$  s    ``    r/   meta_addbmmrv    s    ;;q>D;;q>D;;d|$D	LL"$HI	LL"$HI	LLA&++a.(p 
LLA&++a.(	
 
LL		!51!5@ >>$))+&&r1   )
grad_scale	found_infc       	         n    | |||||fD ])  t        j                  t        t              fd       + y )Nc                       dt                S Nz'exponent must be a tensor list but got rf   ls   r/   rQ   z#meta__fused_adam_.<locals>.<lambda>      =d1gYG r1   rB   rS   r\   r   )r   gradsexp_avgsexp_avg_sqsmax_exp_avg_sqsstate_stepslrbeta1beta2weight_decayepsamsgradmaximizerw  rx  r~  s                  @r/   meta__fused_adam_r    s:    & E8[/;O 
q$G	

r1   c       	             | |||||fD ])  t        j                  t        t              fd       + d } ||        ||       ||       ||       ||      fS )Nc                       dt                S r{  r|  r}  s   r/   rQ   z"meta__fused_adam.<locals>.<lambda>  r  r1   c                 R    | D cg c]  }t        j                  |       c}S c c}w r,   r3  )tensor_listr  s     r/   empty_like_listz)meta__fused_adam.<locals>.empty_like_list  s!    -89  #999s   $r  )r   r  r  r  r  r  r  r  r  r  r  r  r  rw  rx  r  r~  s                   @r/   meta__fused_adamr    ss    & E8[/;O 
q$G	

: 	!$( r1   c                 j    t        j                   j                         dk(  d        t        j                  j                         dk(  d        t        j                   j                  t         j                  u  fd       t        j                  j                  t         j                  u fd       t        j                   j                  d      j                  d      k(   fd        j                   j                  d      j                  d      ft         j                  	      S )
Nr   c                       y)Nza must be a 2D tensorr5   r5   r1   r/   rQ   zmeta__int_mm.<locals>.<lambda>  rY   r1   c                       y)Nzb must be a 2D tensorr5   r5   r1   r/   rQ   zmeta__int_mm.<locals>.<lambda>  rY   r1   c                  "    d j                    S )Nzexpected self to be int8, got r{   )rM  s   r/   rQ   zmeta__int_mm.<locals>.<lambda>      0	: r1   c                  "    d j                    S )Nzexpected mat2 to be int8, got r{   )rN  s   r/   rQ   zmeta__int_mm.<locals>.<lambda>  r  r1   r   r   c            
          d j                  d       d j                  d       dj                  d       dj                  d       d	S )Nz'Incompatible matrix sizes for _int_mm (r   r?   r   r   re   r   rM  rN  s   r/   rQ   zmeta__int_mm.<locals>.<lambda>  sM    5affQi[!&&) M66!9+Qqvvayk, r1   r{   )rB   rS   rq   rJ   r  r   r   r  r  s   ``r/   meta__int_mmr    s     
LLA>?	LLA>?	LL	5::: 
LL	5::: 
LL	q	QVVAY	
 ;;q	166!9-U[[;AAr1   c                 f    t        j                   j                         dk(  d        t        j                   j                  t         j                  u  fd        j                  d      } j                  d      dz  } j                  |dz  ||dz  z  d|dz  ft         j                  	      S )
Nr   c                       yNzw must be a 2D tensorr5   r5   r1   r/   rQ   z2meta__convert_weight_to_int4pack.<locals>.<lambda>  rY   r1   c                  "    d j                    S Nzexpected w to be uint8, got r{   r  s   r/   rQ   z2meta__convert_weight_to_int4pack.<locals>.<lambda>       .qwwi8 r1   r   r      r)      r{   )rB   rS   rq   rJ   uint8r   r   r  r  inner_k_tilesr   r1  s   `   r/    meta__convert_weight_to_int4packr    s    	LLA>?	LL	5;;8 	
q	A	q	AA;;F-"$%Q		
 kk   r1   c                 J    t        j                   j                         dk(  d        t        j                   j                  t         j                  u  fd        j                  d      } j                  d      } j                  ||dz  ft         j                        S )Nr   c                       yr  r5   r5   r1   r/   rQ   z:meta__convert_weight_to_int4pack_for_cpu.<locals>.<lambda>  rY   r1   c                  "    d j                    S Nzexpected w to be int32, got r{   r  s   r/   rQ   z:meta__convert_weight_to_int4pack_for_cpu.<locals>.<lambda>  r  r1   r   r   r{   )rB   rS   rq   rJ   r  r   r   r  r  s   `   r/   (meta__convert_weight_to_int4pack_for_cpur    s    	LLA>?	LL	5;;8 	
q	A	q	A;;	
AFkk   r1   c                 .    t        j                   j                         dk(  d        t        j                  j                         dk(  d        t        j                   j                  t         j                  t         j
                  t         j                  fv  fd       t        j                  j                  t         j                  u fd        j                   j                  d      j                  d      dz   j                  	      S )
Nr   c                       yNzx must be a 2D tensorr5   r5   r1   r/   rQ   z*meta__weight_int4pack_mm.<locals>.<lambda>   rY   r1   r  c                       y)Nzw must be a 4D tensorr5   r5   r1   r/   rQ   z*meta__weight_int4pack_mm.<locals>.<lambda>!  rY   r1   c                  "    d j                    S Nz#expected x to be f32/f16/bf16, got r{   r?   s   r/   rQ   z*meta__weight_int4pack_mm.<locals>.<lambda>$      5aggY? r1   c                  "    d j                    S r  r{   r  s   r/   rQ   z*meta__weight_int4pack_mm.<locals>.<lambda>(  r  r1   r   r  r{   )
rB   rS   rq   rJ   r*  r+  r,  r  r   r   r?   r  q_group_sizeq_scale_and_zeross   ``  r/   meta__weight_int4pack_mmr    s    	LLA>?	LLA>?	LL	EMM5==%..AA? 
LL	5;;8 ;;qvvay!&&)a-qww;??r1   c                 (    t        j                   j                         dk(  d        t        j                  j                         dk(  d        t        j                   j                  t         j                  t         j
                  t         j                  fv  fd       t        j                  j                  t         j                  u fd        j                   j                  d      j                  d       j                        S )Nr   c                       yr  r5   r5   r1   r/   rQ   z2meta__weight_int4pack_mm_for_cpu.<locals>.<lambda>/  rY   r1   c                       yr  r5   r5   r1   r/   rQ   z2meta__weight_int4pack_mm_for_cpu.<locals>.<lambda>0  rY   r1   c                  "    d j                    S r  r{   r  s   r/   rQ   z2meta__weight_int4pack_mm_for_cpu.<locals>.<lambda>3  r  r1   c                  "    d j                    S r  r{   r  s   r/   rQ   z2meta__weight_int4pack_mm_for_cpu.<locals>.<lambda>7  r  r1   r   r{   )
rB   rS   rq   rJ   r*  r+  r,  r  r   r   r  s   ``  r/    meta__weight_int4pack_mm_for_cpur  -  s    	LLA>?	LLA>?	LL	EMM5==%..AA? 
LL	5;;8 ;;qvvay!&&)177;;;r1   c                 (    t        j                   j                         dk(  d        t        j                   j                  t         j                  t         j
                  t         j                  fv  fd       t        j                  j                         dk(  d        t        j                  j                  t         j                  u fd        j                   j                  d      j                  d       j                        S )Nr   c                       yr  r5   r5   r1   r/   rQ   z*meta__weight_int8pack_mm.<locals>.<lambda>>  rY   r1   c                  "    d j                    S r  r{   r  s   r/   rQ   z*meta__weight_int8pack_mm.<locals>.<lambda>A  r  r1   c                       yr  r5   r5   r1   r/   rQ   z*meta__weight_int8pack_mm.<locals>.<lambda>C  rY   r1   c                  "    d j                    S )Nzexpected w to be int8, got r{   r  s   r/   rQ   z*meta__weight_int8pack_mm.<locals>.<lambda>F  s    -aggY7 r1   r   r{   )
rB   rS   rq   rJ   r*  r+  r,  r  r   r   )r?   r  q_scaless   `` r/   meta__weight_int8pack_mmr  <  s    	LLA>?	LL	EMM5==%..AA? 
LLA>?	LL	5::7 ;;qvvay!&&)177;;;r1   c                 f    t        j                   j                         dk\   fd       t        j                  j                         dk\  fd       t        j                   j                  d      j                  d      k(   fd       t        j                  t	        j
                   j                        d        t        j                  t	        j
                  j                        d        t        j                  |dk\  d	        t        j                  d
v fd        j                  d      }j                  d      } j                  d d }j                  d d }t        t        j                  ||            }|j                  ||g        j                  |      S )Nr   c                  ,    d j                          dS )Nz1cdist only supports at least 2D tensors, X1 got: Dr   )x1s   r/   rQ   z$meta_cdist_forward.<locals>.<lambda>O      CBFFH:QO r1   c                  ,    d j                          dS )Nz1cdist only supports at least 2D tensors, X2 got: r  r   )x2s   r/   rQ   z$meta_cdist_forward.<locals>.<lambda>S  r  r1   r   c                  P    d j                  d       dj                  d       S )Nz4X1 and X2 must have the same number of columns. X1: r   z X2: r   )r  r  s   r/   rQ   z$meta_cdist_forward.<locals>.<lambda>W  s,    Frwwr{mSXY[Y`Y`acYdXef r1   c                       y)Nz=cdist only supports floating-point dtypes, X1 got: {x1.dtype}r5   r5   r1   r/   rQ   z$meta_cdist_forward.<locals>.<lambda>[  rY   r1   c                       y)Nz=cdist only supports floating-point dtypes, X2 got: {x2.dtype}r5   r5   r1   r/   rQ   z$meta_cdist_forward.<locals>.<lambda>_  rY   r1   r   c                       y)Nz)cdist only supports non-negative p valuesr5   r5   r1   r/   rQ   z$meta_cdist_forward.<locals>.<lambda>a  rY   r1   Nr   r   c                      d  S )Nz%possible modes: None, 1, 2, but was: r5   )compute_modes   r/   rQ   z$meta_cdist_forward.<locals>.<lambda>d  s    7~F r1   r  )rB   rS   rq   r   r:   is_float_dtyperJ   r   r   broadcast_shapesextendr   )	r  r  r
  r  r1r2batch_tensor1batch_tensor2r3  s	   `` `     r/   meta_cdist_forwardr  K  sJ   	LL
AO 
LL
AO 
LL
rwwr{"f 
LLRXX&O 
LLRXX&O 
LLaLM	LL$F 
B	BHHSbMMHHSbMM..}mLMLR!<<%%r1   c                 4   |j                   d   }|j                   d   }|j                   d   }|j                   d d }|j                   d d }	t        t        j                  ||	            }
|
j	                         }|j                  ||g       t        j                  |
      }|dk(  s|dk(  s
|dk(  s|dk(  rt        j                  |      S |t        |j                         k7  r|j                  |      }t        j                  |t        j                        S )Nr   r  r   r   )r   r   rB   r  copyr  mathprod
zeros_likerS  r   r   )r  r  r  r
  cdistc1r  r  r  r  rO  tensor1_expand_sizebatch_products                r/   meta_cdist_backwardr  o  s     
"B	"B	"BHHSbMMHHSbMM 6 6}m TU.335Bx(II23M	Qw"'R1W(:##d288n,YY*+Be.E.EFFr1   c	                     t        j                  j                  t         j                  t         j                  fv fd       t        j                  j                  t         j                  t         j                  fv fd       t        j                  t        j                   j                         fd       j                  d      }	|rt        j                  |	dk\  d        |	dz  }	 j                  |	 j                  d            }
t        d      \  }}}yt        j                  ||k(  d        t        j                  j                  dk(  fd	       t        j                  j                         j                         k(  fd
       fdd fd}t              dk7  rxj                  j                  d            }j                  j                               }||k(  r"j                  |	 j                  d            }n܉j                  d      }n | |
|      }|||fv s|s!j                  j                  d            }nj                  d      }j                  |	      }j                  d   }||k(  rA|rt        j                  |dk\  d        |dz  }j                  | j                  d         }nj                  |j                               }|
|||fS )Nc                  "    d j                    S )Nz(expected indices to be long or int, got r{   )r   s   r/   rQ   z$meta_embedding_bag.<locals>.<lambda>      :7==/J r1   c                  "    d j                    S )Nz(expected offsets to be long or int, got r{   )rC  s   r/   rQ   z$meta_embedding_bag.<locals>.<lambda>  r  r1   c                  "    d j                    S )Nz/expected weight to be floating point type, got r{   )r  s   r/   rQ   z$meta_embedding_bag.<locals>.<lambda>  s    A&,,P r1   r   r   c                       yNz1include_last_offset: numBags should be at least 1r5   r5   r1   r/   rQ   z$meta_embedding_bag.<locals>.<lambda>  rY   r1   r   c                       y)Nz@embedding_bag: per_sample_weights only supported with mode='sum'r5   r5   r1   r/   rQ   z$meta_embedding_bag.<locals>.<lambda>  rY   r1   c                  $    d j                    dS )Nz1expected per_sample_weights to be 1D tensor, got r  r  )per_sample_weightss   r/   rQ   z$meta_embedding_bag.<locals>.<lambda>  s    GHZH_H_G``ab r1   c                  N    dj                          d j                          dS )Nz%expected per_sample_weights.numel() (z$ to be the same as indices.numel() (re   r   )r   r  s   r/   rQ   z$meta_embedding_bag.<locals>.<lambda>  s4    78J8P8P8R7S T66=mmo5FaI r1   c                 D     | ||      xr |j                  d      dk(  S Nr   r   r   )r   r<  r   padding_idxis_fast_path_index_selects       r/   is_fast_path_index_select_scalez;meta_embedding_bag.<locals>.is_fast_path_index_select_scale  s(    %c6;?XELLQROWXDX	
r1   c                     | j                   t        j                  k(  xs | j                   t        j                  k(  xr1 | j	                  d      dk(  xr |j	                  d      dk(  xr |dk  S Nr   r   )rJ   rB   rF   rD   r   )r   r   r  s      r/   r  z5meta_embedding_bag.<locals>.is_fast_path_index_select  sb    YY%++%@ejj)@  

1" a A%  a		
r1   c                 2    | | |||      S  | ||      S r,   r5   )r   r<  r   r  r  r  s       r/   is_fast_pathz(meta_embedding_bag.<locals>.is_fast_path  s)    23v{SS,S&+FFr1   cpuc                       yr  r5   r5   r1   r/   rQ   z$meta_embedding_bag.<locals>.<lambda>  rY   r1   )rB   rS   rJ   r   r  r:   r  r   r   r   r   r   rC  r   )r  r   rC  scale_grad_by_freqr*  sparser  include_last_offsetr  num_bagsr   MODE_SUM	MODE_MEANMODE_MAXr  
offset2bagbag_sizemax_indicesfast_path_sumnumBagsr  r  s   ```   `             @@r/   meta_embedding_bagr    s    
LL%**eii00J 
LL%**eii00J 
LLV\\*P
 ||AHMG	
 	AhA7F$)!H!Hi%HV	
 	##q(b	
 	$$&'--/9	



G 7u$&&w||A7
$$W\\^48!++Hfkk!nEK!++A.K$V-?UIx(( **7<<?;J **1-J$$X.--"8"qLO 1!++GV\\!_EK!++HMMO<K:x44r1   c                     t        | ||g| \  }}}}t        |      dk(  r|j                  |j                               }||||fS )Nr  )r  rC  r   r   )r  r   rC  r<   r   r  r	  r
  s           r/   meta_embedding_bag_forward_onlyr    sX    0B1#'1-FJ+ 7u$$$W\\^4:x44r1   c                     |r|S | j                   j                  s| j                   j                  r| j                   S |rt        j                  S | j                   S r,   )rJ   r   r   rB   r   )r   rJ   promote_int_to_longs      r/   _get_reduction_dtyper    sD    {{$$(>(>{{	zz;;r1   r{   c                    t        | |d      }t        j                  | j                  |      }t	        | ||      }| j                  ||      S )NT)r  r{   )r  r:   rW  r   rX  r   )r   rR  rZ  rJ   r  r3  s         r/   meta_nansumr    sI     (u$OLT2D+E4AL??<|?<<r1   c           	          t        j                  | j                  t        t	        | j                                           }| j                  |      S r,   )r:   rS  r   rR   r   rq   r   )r   r3  s     r/   meta_medianr    s<    77U5-.L ??<((r1   c                    t        |       dk(  rt        j                  d       t        j                  | j                  |f      }t        | ||      }| j                  |      | j                  |t        j                        fS )NrB  zmedian CUDA with indices outputr{   )	rC  r:   alert_not_deterministicrW  r   rX  r   rB   r   )r   rq   rZ  r3  s       r/   meta_median_mode_dimr    sp     5V#%%&GH


u{{SF
3C+E3@L%EJJ7 r1   c                     | S r,   r5   r   s    r/   meta_logical_not_r  )  r/  r1   c                 R   t        j                  t        |      | j                         k\  d        t        |      | j                         z
  }d|z  t	        | j
                        z   }t        t        |            D cg c]  }||   ||   z   }}| j                  |      S c c}w )Nc                       y)NzZNumber of dimensions of repeat dims can not be smaller than number of dimensions of tensorr5   r5   r1   r/   rQ   zmeta_repeat.<locals>.<lambda>2  rY   r1   r  )rB   rS   r   rq   rR   r   r   r   )r   rE  num_new_dimensionspadded_sizer   target_sizes         r/   meta_repeatr!  .  s    	LLG
"l W
2++eDJJ.??K8=c'l8KL1;q>GAJ.LKL>>+&& Ms   ?B$c                     | S r,   r5   r   s    r/   
meta_zero_r#  =  r/  r1   c                 z    t        |t        j                        r t        | j                  |j                         | S r,   )r\   rB   r   rU   r   r   r   s     r/   meta_binop_inplacer&  B  s)     %&

EKK8Kr1   c                     d }d }d } ||       r ||      rt        d       ||       r ||      st        d      t        |t        j                        r t	        | j
                  |j
                         | S )a*  
    Some checks for inplace ops.
    Checks for promotion rules for some dtypes.
    int.add/sub_(float) and bool.add/sub_(others) are rejected.
    Promoting in these in-place operations would require reallocating
    and copying over elements, hence not allowed.
    Checks for alpha param.
    c                     t        | t              rt        j                  | j                        S t        | t
              S r,   )r\   r   r:   r  rJ   r   r_   s    r/   is_integericz.meta_binop_inplace_alpha.<locals>.is_integerice  s.    c:&))#))44c7++r1   c                     t        | t              rt        j                  | j                        S t        | t
              S r,   )r\   r   r:   r  rJ   r   r)  s    r/   
is_floaticz,meta_binop_inplace_alpha.<locals>.is_floatick  s.    c:&''		22c9--r1   c                     t        | t              rt        j                  | j                        S t        | t
              S r,   )r\   r   r:   is_boolean_dtyperJ   r   r)  s    r/   is_booleanicz.meta_binop_inplace_alpha.<locals>.is_booleanicq  s.    c:&))#))44c8,,r1   z]Promotion of int.add/sub_(float) in in-place ops are not possible due to element size change.z_Promotion of book.add/sub_(others) in in-place ops are not possible due to element size change.)r   r\   rB   r   rU   r   )r   r   r  r*  r,  r/  s         r/   meta_binop_inplace_alphar0  S  sz    $,.- Dj/k
 	

 D,u"5m
 	
 %&

EKK8Kr1   c                 8    t        | t        j                        S Nr7   r@   r   r;   )r   kwargss     r/   
meta_roundr6    s    <DD r1   c                 l    t        j                  t        j                  j                         fd       t        t         j                        r8t        j                  t        j                  j                         fd       y t        j                  t        t               fd       y )Nc                  &      dj                    S )Nz7: Expected input tensor to have an integral dtype. Got r{   )r  r   s   r/   rQ   z#shift_dtype_check.<locals>.<lambda>  s    7)RSWS]S]R^_ r1   c                  &      dj                    S )Nz6: Expected shift value to have an integral dtype. Got r{   r  rk  s   r/   rQ   z#shift_dtype_check.<locals>.<lambda>  s    wiUVYV_V_U`a r1   c                        d S )Nz): Expected shift value to be an int. Got r5   r:  s   r/   rQ   z#shift_dtype_check.<locals>.<lambda>  s    wiHN r1   )rB   rS   r:   r  rJ   r\   r   r   )r  r   rk  s   ```r/   shift_dtype_checkr<    sp    	LLtzz*_ #u||$""399-a	

 	sG$N	
r1   c                 T    t        d| |       t        | |t        j                        S )Nrshiftr3  r<  r@   r   r;   r%  s     r/   meta_rshiftsr@    )    he,e$C$K$K r1   c                 T    t        d| |       t        | |t        j                        S )Nlshiftr3  r?  r%  s     r/   meta_lshiftsrD    rA  r1   c                 8    | j                  | j                        S r,   r  r   s    r/   	meta_zerorF    s    >>$**%%r1   c                     | S r,   r5   r   rk  s     r/   
meta_fill_rI    r/  r1   c                 ,    t        j                  |       S r,   r3  rH  s     r/   	meta_fillrK        D!!r1   c                     | S r,   r5   r   s    r/   
meta_relu_rN    r/  r1   c                 :    t        | |t        j                        S r2  r4  )r   r   r  s      r/   meta__add_relurP    s     e$C$K$K r1   c                 ,    t        j                  |       S r,   r3  r   noiselowerr  rs  r   s         r/   meta_rrelu_with_noiserU    s    
 D!!r1   c                 V    t        j                  |       t        j                  |      fS r,   r3  rR  s         r/    meta_rrelu_with_noise_functionalrW    s%     D!5#3#3E#:::r1   c                     | S r,   r5   )r   rT  r  rs  r   s        r/   meta_rrelu_with_noise_rY    s	     Kr1   c                 ,    t        j                  |       S r,   r3  r   r   r   
accumulates       r/   meta_index_putr]    rL  r1   c                 F    t        | j                  |j                         | S r,   rU   r   )r   rD  values      r/   meta_masked_fill_ra    s    DJJ

3Kr1   c                     | j                  | j                               j                  t        j                  |             }|S r:  )r   r   r   r:   rl  )r   rD  r<  masked_scales       r/   meta__masked_scalerd    s<    >>$))+.1111$7 2 L r1   c                      t        j                  |j                  t         j                  t         j                  fv d        t        j                   j                  j                  k(   fd        S )Nc                       y)NzMask must be bool or uint8r5   r5   r1   r/   rQ   z&meta_masked_scatter_.<locals>.<lambda>  rY   r1   c                  <    d j                    dj                    S )NzEmasked_scatter: expected self and source to have same dtypes but got r   r{   )r   r6  s   r/   rQ   z&meta_masked_scatter_.<locals>.<lambda>  s"     **U6<<.: r1   )rB   rS   rJ   r7  r  )r   rD  r6  s   ` `r/   meta_masked_scatter_rh    sU    	LL

uzz5;;//1U 
LL

fll"	:
 Kr1   c                     t        | |      \  } }t        j                  | t        j                        }t	        |||      S r:  )r$   rB   r   r   rh  )r   rD  r6  r   s       r/   meta_masked_scatterrj    s;     "$-JD$d%2I2IJFf55r1   c                 $    | j                  |      S r,   rQ  )r   rD  r  s      r/   meta_masked_scatter_backwardrl  
  s    >>%  r1   c                     | S r,   r5   r[  s       r/   meta_index_put_rn    r/  r1   c                 8    | j                  | j                        S r,   )viewr   r   s    r/   
meta_aliasrq    s    99TZZ  r1   c                 H  	
 t        j                  | j                         dk(  d        t        j                  |j                         dk(  d        | j                         }|j                         |d   	|d   
|d   }d   }	||ft        j                  d   	k(  xr d   
k(  	
fd       |j	                        }|sUSt        j                  j                         dk(  d        t        j                  j                         k(  fd	       |S )
Nr   c                       yr  r5   r5   r1   r/   rQ   z)common_meta_baddbmm_bmm.<locals>.<lambda>  rY   r1   c                       yr  r5   r5   r1   r/   rQ   z)common_meta_baddbmm_bmm.<locals>.<lambda>  rY   r1   r   r   r   c            	      .    d d d d    d d    d	S r  r5   r  s   r/   rQ   z)common_meta_baddbmm_bmm.<locals>.<lambda>(  s5    RSURV
l<?*;2l1o=NbR r1   c                       y)Nzself must be a 3D tensorr5   r5   r1   r/   rQ   z)common_meta_baddbmm_bmm.<locals>.<lambda>1  rY   r1   c                  0    d  dj                          S )Nz*Expected an input tensor shape with shape z but got shape: r   )r  self_baddbmms   r/   rQ   z)common_meta_baddbmm_bmm.<locals>.<lambda>4  s!    @M]^j^o^o^q]rs r1   )rB   rS   rq   r   r   )r  r  is_bmmrx  r&  res_rowsres_colsr   r   r!  r"  r  s      `    @@@@r/   common_meta_baddbmm_bmmr|    s	   	LL"$HI	LL"$HI;;=L;;=L	aB#AAHAHx*K	LLQ2E,q/5E"E	R k*Fl.\%%'1,.PQ;.s	

 Mr1   c                     t        | |d      S )NT)r|  )r   r  s     r/   meta_bmmr~  :  s    "4t44r1   c                 h    | |z  }| |z  }|dk7  r"t        |dk        t        |dk        k7  r|dz  }|S r  )r7  )r?   yqr.  s       r/   div_rtnr  ?  sB    	QA	AA 	Av4A;$q1u+-	QHr1   c                     t        | |z   |z   ||dz
  z  z
  dz
  |r|dz
  ndz   |      dz   }|r|dz
  |z  | |z   k\  r|dz  }|S r  )r  )	inputSize
kernelSizer  r  r   rZ  r  
outputSizes           r/   pooling_output_shape_pad_lrr  I  s     	 *q.)* 	
 'vzA/ 	
 		  Nf$	E(99!OJr1   c           	          t        j                  |dk7  d        t        j                  dk\  fd       t        j                  dz
  z  dz   dz  k  fd       t        | ||      S )Nr   c                       y)Nzstride should not be zeror5   r5   r1   r/   rQ   z&pooling_output_shape.<locals>.<lambda>e  rY   r1   c                      d  S )Nz'pad must be non-negative, but got pad: r5   )pads   r/   rQ   z&pooling_output_shape.<locals>.<lambda>f  s    %LSE#R r1   r   r   c                      d d d  S )NzApad should be at most half of effective kernel size, but got pad=z, kernel_size=z and dilation=r5   )rZ  r  r  s   r/   rQ   z&pooling_output_shape.<locals>.<lambda>i  s'    OPSu U%,nXJ@ r1   )rB   rS   r  )r  r  r  r   rZ  r  s    `` ` r/   r  r  d  ss    	LL1AB	LLRS	LLa8+a/A55	
 ':sC9 r1   c           	      >   	
  j                         }	t        j                  dkD  xr dkD  d        t        j                  |dkD  xr |dkD  d        t        j                  |dkD  xr |dkD  d         j                  d      dk7  xr  j                  d      dk7  }|t        j                  k(  r5t        j                  |dk(  xr |xr  j                  d      dk7  d	        nWt        j                  |dk(  xr  j                  d      dk7  xr |xs |dk(  xr |xr  j                  d      dk7   fd
       t        j                  dz  k\  xr dz  k\  fd       t        j                  dk\  xr dk\  
	fd       y )Nr   c                       y)NzCkernel size should be greater than zero, but got kH: {kH}, kW: {kW}r5   r5   r1   r/   rQ   z$pool2d_shape_check.<locals>.<lambda>  rY   r1   c                       y)Nz>stride should be greater than zero, but got dH: {dH}, dW: {dW}r5   r5   r1   r/   rQ   z$pool2d_shape_check.<locals>.<lambda>  rY   r1   c                       y)Nz\dilation should be greater than zero, but got dilationH: {dilationH}, dilationW: {dilationW}r5   r5   r1   r/   rQ   z$pool2d_shape_check.<locals>.<lambda>  rY   r1   r   r   r  r   c                       y)NzExpected 4D (batch mode) tensor expected for input with channels_last layout with optional 0 dim batch size for input, but got: {input.size()}r5   r5   r1   r/   rQ   z$pool2d_shape_check.<locals>.<lambda>  rY   r1   c                  *    d j                          S )NzYExpected 3D or 4D (batch mode) tensor with optional 0 dim batch size for input, but got: r   r(  s   r/   rQ   z$pool2d_shape_check.<locals>.<lambda>  s    opupzpzp|o}~ r1   c                       d d d d  S )NzKpad should be smaller than or equal to half of kernel size, but got padW = z	, padH = z, kW = z, kH = r5   )r  r  r  r  s   r/   rQ   z$pool2d_shape_check.<locals>.<lambda>  s&     ygbT> r1   c                  .    d d  d d d d dS NzGiven input size: (r?   z). Calculated output size: (z). Output size is too smallr5   )r  r  r  r  r  r  s   r/   rQ   z$pool2d_shape_check.<locals>.<lambda>  s:    %k]!K=* N$$0><.+ O## r1   )rq   rB   rS   r   rm  )r   r  r  r  r  r  r  	dilationH	dilationWr  r  r  r  r  r   r   
valid_dimsr  s   ```  ``  `````   @r/   r  r  s  s   " 99;DL	LL
Q26U 
LL
Q26P 
LLA')a-n
 A!#:

1(:J+++AI;*;A!);Q	
 	QY<5::a=A-<* A	?j?UZZ]a-?~	
 
LL
a4+B!GtO	> 
LLq.\Q.	# 	#r1   r  r  r  r  r  r  r  pTpHpW	dilationTr  r  r  r  r  r  r  r  r  c           
      J   	
  j                   }t        j                  dkD  xr dkD  xr dkD  fd       t        j                  dkD  xr dkD  xr dkD  fd       t        j                  dkD  xr dkD  xr dkD  fd       t        j                  |dv  fd       t        |      D ]:  |dk(  rdk(  rt        j                   j	                        dkD   fd       < |r/t        j                  k\  xr k\  xr k\  fd	       t        j                  d
z  k\  xr d
z  
k\  xr d
z  	k\  	
fd       t        j                  dk\  xr dk\  xr dk\  fd       y )Nr   c                      d d  d S )Nz5kernel size should be greater than zero, but got kT: z, kH: z, kW: r5   )r  r  r  s   r/   rQ   z$pool3d_shape_check.<locals>.<lambda>  #    $fRDrd, r1   c                      d d  d S )Nz0stride should be greater than zero, but got dT: z, dH: z, dW: r5   )r  r  r  s   r/   rQ   z$pool3d_shape_check.<locals>.<lambda>  r  r1   c                      d d  d S )Nz9dilation should be greater than zero, but got dilationT: z, dilationH: z, dilationW: r5   )r  r  r  s   r/   rQ   z$pool3d_shape_check.<locals>.<lambda>  s$    #M)M)V r1   r  c                  &      dj                    S )Nz/: Expected 4D or 5D tensor for input, but got: r  )r  r   s   r/   rQ   z$pool3d_shape_check.<locals>.<lambda>  s    7)J5;;-X r1   r  c                  L      dj                    dj                         dS )NzZ: Expected input's non-batch dimensions to have positive length, but input has a shape of z and non-batch dimension z has length zero!)r   r   )r  r   r   s   r/   rQ   z$pool3d_shape_check.<locals>.<lambda>  s3    ) --2[[M+EJJqM?:KM r1   c                  .    d d  d d d d dS )Nzinput image (T: r  r  z ) smaller than kernel size (kT:  kH:  kW: re   r5   )r  r  r  r  r  r  s   r/   rQ   z$pool3d_shape_check.<locals>.<lambda>  s9    "5'gYd6( C$$&4uRDbT< r1   r   c                  ,    d d d  d d d S )NzHpad should be smaller than or equal to half of kernel size, but got kT: r  r  z padT: z padW: z padH: r5   )r  r  r  r  r  r  s   r/   rQ   z$pool3d_shape_check.<locals>.<lambda>  s6    $eB4uRDt72$gbTK r1   r   c                  :    d d d  d d d d d dS r  r5   )r  r  r  r  r  r  r  s   r/   rQ   z$pool3d_shape_check.<locals>.<lambda>  sI    !'!E7!G9AfX F((/y%'!F8 L'( r1   )r   rB   rS   r   r   )r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r   s   `````````````````````  @r/   r  r    s   0 ::D	LL
Q$26$b1f	
 
LL
Q$26$b1f	
 
LLA9)a-9IM	
 
LLX
 4[ 
19aJJqMA	
	
 RK:GrM:fl 	
 
LL
Q"6a26"q&B,	
 	
 
LL
3v{3w!|	
 	
r1   c                 j   | j                   }t        | |||||||	|
||||||||||||       t        |||dz
  |       t        |||dz
  |       t        |||dz
  |       t        |||dz
  |       t        |||dz
  |       t        |||dz
  |       t        |||dz
  |       t        |||dz
  |       y )Nr  r   r   r   r   r  r  )r   r  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   s                           r/   max_pool3d_backward_shape_checkr    s    2 ::D








+0 ;dQh8;dQh6;dQh8;dQh77D$(G47D$(E27D$(G47D$(F3r1   c                     | j                   }t        | ||||||||	|
|ddd|||||||d       t        |||dz
  |       t        |||dz
  |       t        |||dz
  |       t        |||dz
  |       y )Nr   Tr  r   r   r  )r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   s                       r/   r  r  L  s    * ::D








			-2 ;dQh8;dQh6;dQh8;dQh7r1   c                    d } |d|      \  }}t        j                  t        |      dv d        t        |      dk(  r||}
}	n |d|      \  }	}
 |d|      \  }} |d|      \  }}| j                  d	      }| j                  d
      }| j                  d      }t	        j
                  |       }|t         j                  k(  r)t        j                  | j                         dk(  d        nR|t         j                  k(  r(t        j                  | j                         dv d        nt        j                  dd        t        ||||	||      }t        ||||
||      }t        | |||	|
||||||||||       |||fS )Nc                      t        j                  t        |      dv  fd       |d   }t        |      dk(  r|n|d   }||fS )Nr  c                      d  dS )Nzmax_pool2d: r  r5   r  s   r/   rQ   zEmax_pool2d_checks_and_compute_shape.<locals>.unpack.<locals>.<lambda>  r  r1   r   r   r  r  s   `   r/   r  z3max_pool2d_checks_and_compute_shape.<locals>.unpack  r  r1   rd  r  c                       y)NzOmax_pool2d: stride must either be omitted, a single int, or a tuple of two intsr5   r5   r1   r/   rQ   z5max_pool2d_checks_and_compute_shape.<locals>.<lambda>  rY   r1   r   r   r  rZ  r  r  r   r  c                       y)NzMnon-empty 4D (batch mode) tensor expected for input with channels_last layoutr5   r5   r1   r/   rQ   z5max_pool2d_checks_and_compute_shape.<locals>.<lambda>  rY   r1   r&  c                       y)Nz9non-empty 3D or 4D (batch mode) tensor expected for inputr5   r5   r1   r/   rQ   z5max_pool2d_checks_and_compute_shape.<locals>.<lambda>  rY   r1   Fc                       y)Nz?Unsupport memory format. Supports only ChannelsLast, Contiguousr5   r5   r1   r/   rQ   z5max_pool2d_checks_and_compute_shape.<locals>.<lambda>  rY   r1   )rB   rS   r   r   r:   rl  rm  rq   r   r  r  )r   rd  r   r  rZ  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r  r  s                        r/   r  r    s    M;/FB	LLFy a 6{aRB&)B	7+JD$!*h7Iy**R.K**R.KBJ//6M+++IIK1c	
 
%11	1IIK6!O	

 	U	

 (Rr9iXL&z2tRIVK



$ k11r1   c                 |    t        |||||      \  }t        j                  j                   j                  k(   fd       |j                  fd}	 |	         |	|       t        j                        }
t        j                  j                  j                  j                  |
      S )Nc                  <    dj                    d j                    S )NzExpected dtype z  for `gradOutput` but got dtype r{   r!  s   r/   rQ   z7meta_max_pool2d_with_indices_backward.<locals>.<lambda>  s     /$**-MkN_N_M`a r1   c                 l    t        | dz
         t        | dz
         t        | dz
         y )Nr   r   r   )r  )r  r  r   r  r  s    r/   _check_dim_sizez>meta_max_pool2d_with_indices_backward.<locals>._check_dim_size  s9    q$q,7q$q,7q$q+6r1   rJ  )
r  rB   rS   rJ   r   r:   rl  rv   r   rn   )r  r   rd  r   r  rZ  r  r   r  r  r   r  r   r  r  s   ``         @@@@r/   %meta_max_pool2d_with_indices_backwardr    s     	,k67Hi		
 
LL

k'''a
 L99D7
 K G//5M;;

jj{{#	 r1   c                    t        | |||||      \  }}}| j                         dk(  r| j                  d      nd}	t        j                  |       }
| j                         dk(  r|||g}n|	|||g}t        j                  || j                  | j                  |
      t        j                  |t
        j                  | j                  |
      fS r  )
r  rq   r   r:   rl  rB   rv   rJ   rn   r   r  s               r/   meta_max_pool2d_with_indicesr    s     	,{FGXy		
  %yy{a/UZZ^QF//6Myy{a\;7\;?++<<'		
 	++<<'		
 r1   c           	         
 t        j                   j                  dv  fd        j                  }t        |dz
  |      D ]?  
t        j                   j	                  
      dkD  d j	                          d
 d       A t        j                  t              dk(  d	        t        j                  t        |      dk(  d
         j	                  d      } j	                  d       j	                  d      |dk(  r j	                  d      }nd}t        j                   j                  j                  k(  d        t        j                  j                  dk(  fd       j	                  d      }j	                  d      }j	                  d      
t        j                  ||k\  d       t        j                  ||k(  d        t        j                  
dk(  
fd       t        j                  |d   d   z   dz
  k  fd       t        j                  |d   d   z   dz
  k  fd        j                         dk(  r|||d   |d   g}	n||d   |d   g}	t        j                  |	 j                   j                        t        j                  |	t         j                   j                        fS )Nr&  c                  "    d j                    S )Nz:fractional_max_pool2d: Expected 3D or 4D tensor, but got: r  r   s   r/   rQ   z,meta_fractional_max_pool2d.<locals>.<lambda>%  s    LTYYKX r1   r   r   z^fractional_max_pool2d: Expected input to have non-zero  size for non-batch dimenions, but got r  z emptyr   c                       y)NzNfractional_max_pool2d: kernel_size musteither be a single int or tuple of Intsr5   r5   r1   r/   rQ   z,meta_fractional_max_pool2d.<locals>.<lambda>3  rY   r1   c                       y)NzOfractional_max_pool2d: output_size must either be a single int or tuple of Intsr5   r5   r1   r/   rQ   z,meta_fractional_max_pool2d.<locals>.<lambda>8  rY   r1   r  r  r   r  r   c                       y)Nz6Expect _random_samples to have the same dtype as inputr5   r5   r1   r/   rQ   z,meta_fractional_max_pool2d.<locals>.<lambda>F  rY   r1   c                  "    d j                    S )Nz1Expect _random samples to have 3 dimensions got, r  )random_sampless   r/   rQ   z,meta_fractional_max_pool2d.<locals>.<lambda>J  s    CNDWDWCXY r1   z=Expect _random_samples.size(0) no less then input batch size.c                       y)Nz<Expect _random_samples.size(1) equals to input channel size.r5   r5   r1   r/   rQ   z,meta_fractional_max_pool2d.<locals>.<lambda>V  rY   r1   c                      d  dS )Nz/Expect _random_samples.size(2) equals to 2 got .r5   )r   s   r/   rQ   z,meta_fractional_max_pool2d.<locals>.<lambda>X  s    #RSTRUUV!W r1   c                      dd    d  S )Nz%fractional_max_pool2d: kernel height r   z' is too large relative to input height r5   )input_heightrd  s   r/   rQ   z,meta_fractional_max_pool2d.<locals>.<lambda>\  s    7A7GGno{n|} r1   c                      dd    d  S )Nz$fractional_max_pool2d: kernel width r   z& is too large relative to input width r5   )input_widthrd  s   r/   rQ   z,meta_fractional_max_pool2d.<locals>.<lambda>`  s    6{1~6FFlmxlyz r1   rJ   rn   )rB   rS   r   r   r   r   rJ   rq   rv   rn   r   )r   rd  r  r  r   input_channelsinput_batchr   cr   r   r  r  s   `` `      @@@r/   meta_fractional_max_pool2dr  !  s   	LL		VX 99D4!8T" 
IIaL166:iik]BRSTRUU[]	

 
LLKA	2
 
LLKA	2 YYr]N99R=L))B-Kqyiil	LL

n***H 
LLq Y
 	AAAAAA	LL	[G 
LL	^N 
LLaWX	LLAQ'!+|;} 
LLAQ'!+{:z
 xxzQ^[^[^LAA? 	**;;	

 	++;;	
 r1   c                 |   t        j                  t        |      dv d        |d   }t        |      dk(  r|n|d   }t        |      dk(  r|n|d   }t        j                  | xs t        |      dv d        |s|n|d   }	|s|nt        |      dk(  r|	n|d   }
|s|nt        |      dk(  r|	n|d   }t        j                  t        |      dv d        |d   }t        |      dk(  r|n|d   }t        |      dk(  r|n|d   }t        j                  t        |      dv d        |d   }t        |      dk(  r|n|d   }t        |      dk(  r|n|d   }t        j                  | j                  d	v d
        | j                  dk(  r| j	                  d      nd}| j	                  d      }| j	                  d      }| j	                  d      }| j	                  d      }t        ||||	||      }t        ||||
||      }t        ||||||      }t        | |||||	|
|||||||||||||d       | j                  dk(  xr& t        j                  |       t         j                  k(  }| j                  dk(  rK| j                  d      }|j                          xr  |j                  t         j                        }||||f}n|||||f}| j                  |      }| j                  |t         j                        }|r@|j                  t         j                        }|j                  t         j                        }||fS )Nr  c                       yNzMmax_pool3d: kernel_size must either be a single int, or a tuple of three intsr5   r5   r1   r/   rQ   z.meta_max_pool3d_with_indices.<locals>.<lambda>  rY   r1   r   r   r   c                       yNzQmax_pool3d: stride must either be omitted, a single int, or a tuple of three intsr5   r5   r1   r/   rQ   z.meta_max_pool3d_with_indices.<locals>.<lambda>  rY   r1   c                       yNzImax_pool3d: padding must either be a single int, or a tuple of three intsr5   r5   r1   r/   rQ   z.meta_max_pool3d_with_indices.<locals>.<lambda>  rY   r1   c                       yNzJmax_pool3d: dilation must be either a single int, or a tuple of three intsr5   r5   r1   r/   rQ   z.meta_max_pool3d_with_indices.<locals>.<lambda>  rY   r1   r  c                       yr  r5   r5   r1   r/   rQ   z.meta_max_pool3d_with_indices.<locals>.<lambda>  rY   r1   r  r  r  r  r   zmax_pool3d_with_indices()r  r   r{   )rB   rS   r   r   r   r  r  r:   rl  r  ra  r.  r   r   r   )r   rd  r   r  rZ  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  rm  input_channels_last_checkr   r   r   s                                  r/   meta_max_pool3d_with_indicesr  v  sJ    
LLKF"_ 
QB;1$+a.B;1$+a.B	LL
+c&kV+c vayBc&kQ&6F1IBc&kQ&6F1IB	LLG[ 
B7|q gajB7|q gajB	LLH\ I ]a/	Xa[I ]a/	Xa[I	LL

fK
  %zzQUZZ^AFjjnGJJrNEjjnGZZ^F BIyIE"7BB	9MG!&"b"iKF








#+2 	

aXE77>%BXBXX  zzQ$)OOA$6!)7799
'5500 6 
 	
 eWf5	WeWf=	
//)
$Cooiu{{o;Gff5#9#9f:**5+A+A*B<r1   c                    t        j                  t        |      dv d        |d   }t        |      dk(  r|n|d   }	t        |      dk(  r|n|d   }
t        j                  | xs t        |      dv d        |s|n|d   }|s|	nt        |      dk(  r|n|d   }|s|
nt        |      dk(  r|n|d   }t        j                  t        |      dv d        |d   }t        |      dk(  r|n|d   }t        |      dk(  r|n|d   }t        j                  t        |      dv d        |d   }t        |      dk(  r|n|d   }t        |      dk(  r|n|d   }t        j                  |j                  d	v d
        |j	                  d      }|j	                  d      }|j	                  d      }|j	                  d      }| j	                  d      }| j	                  d      }| j	                  d      }t        || ||||	|
|||||||||||||||d       |j                  dk(  xr& t        j                  |      t         j                  k(  }|j                  dk(  rD|j                  d      }|j                          xr  |j                  t         j                        }|j                  |j                        }|r |j                  t         j                        }|S )Nr  c                       yr  r5   r5   r1   r/   rQ   z7meta_max_pool3d_with_indices_backward.<locals>.<lambda>  rY   r1   r   r   r   c                       yr  r5   r5   r1   r/   rQ   z7meta_max_pool3d_with_indices_backward.<locals>.<lambda>  rY   r1   c                       yr  r5   r5   r1   r/   rQ   z7meta_max_pool3d_with_indices_backward.<locals>.<lambda>  rY   r1   c                       yr  r5   r5   r1   r/   rQ   z7meta_max_pool3d_with_indices_backward.<locals>.<lambda>  rY   r1   r  c                       yr  r5   r5   r1   r/   rQ   z7meta_max_pool3d_with_indices_backward.<locals>.<lambda>  rY   r1   r  r  r  r   z"max_pool3d_with_indices_backward()r  r  r   )rB   rS   r   r   r   r  r:   rl  r  ra  r.  r   r   r   )r  r   rd  r   r  rZ  r  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  rm  r  r  s                                 r/   %meta_max_pool3d_with_indices_backwardr    s    
LLKF"_ 
QB;1$+a.B;1$+a.B	LL
+c&kV+c vayBc&kQ&6F1IBc&kQ&6F1IB	LLG[ 
B7|q gajB7|q gajB	LLH\ I ]a/	Xa[I ]a/	Xa[I	LL

fK
 jjnGJJrNEjjnGZZ^FR Er"Gb!F#








,/6 	

aXE77>%BXBXX  zzQ$)OOA$6!)7799
'5500 6 
 	 -J]]1G1G]H
r1   gridc                 z    t        j                   j                  j                  k(   fd       t        j                   j                  t         j                  k(  xr j                  t         j                  k(   fd       t        j                   j
                  d   j
                  d   k(   fd       t        j                  j
                  d    j                  dz
  k(   fd       t        d j                        D ],  t        j                   j
                     dkD   fd       . y )	Nc                  <    dj                    d j                    S )NzNgrid_sampler(): expected input and grid to be on same device, but input is on z and grid is on rv  r  r   s   r/   rQ   z+check_grid_sampler_common.<locals>.<lambda>I  s'    \\N"24;;-A r1   c                  <    dj                    d j                    S )NzTgrid_sampler(): expected input and grid to have torch.strided layout, but input has z and grid has )rm   r  s   r/   rQ   z+check_grid_sampler_common.<locals>.<lambda>P  s&    nT[[MC r1   r   c                  <    dj                    d j                    S )NzZgrid_sampler(): expected grid and input to have same batch size, but got input with sizes  and grid with sizes r  r  s   r/   rQ   z+check_grid_sampler_common.<locals>.<lambda>W  s'      %},A$**O r1   r   r   c                  B    dj                   dz
   d j                   S )Nz+grid_sampler(): expected grid to have size r   z, in last dimension, but got grid with sizes )r   r   r  s   r/   rQ   z+check_grid_sampler_common.<locals>.<lambda>^  s,    9%**q.9I J226**? r1   c                  *    dj                    d  dS )NzYgrid_sampler(): expected input to have non-empty spatial dimensions, but input has sizes r  r  r  r+  s   r/   rQ   z+check_grid_sampler_common.<locals>.<lambda>g  r,  r1   )rB   rS   rn   rm   r}  r   r   r   )r   r  r   s   ``@r/   check_grid_sampler_commonr  F  s    	LL#	
 
LL%F$++*F	
 
LLA$**Q-'	
 
LL

2%**q.(	
 1ejj! 
KKNQ	

r1   c                       e Zd ZdZdZdZy)GridSamplerInterpolationr   r   r   N)rg   
__module____qualname__BILINEARNEARESTBICUBICr5   r1   r/   r  r  n  s    HGGr1   r  interpolation_modec                     t        j                   j                  dk(  xr  j                  j                  k(   fd       t        j                   j                  dk(  xr |t        j                  j
                  k(   d        y )Nr  c                  <    dj                    d j                    S )Nzdgrid_sampler(): expected 5D input and grid with same number of dimensions, but got input with sizes r  r  r  s   r/   rQ   z'check_grid_sampler_3d.<locals>.<lambda>w  s&    449KK=#DJJ<1 r1   c                       y)Nz<grid_sampler(): bicubic interpolation only supports 4D inputr5   r5   r1   r/   rQ   z'check_grid_sampler_3d.<locals>.<lambda>  rY   r1   )rB   rS   r   r  r  r`  )r   r  r  s   `` r/   check_grid_sampler_3dr  t  sp    	LL

a3EJJ$))3	
 
LLJJ!O M"&>&F&F&L&LL	
 	Or1   c                     |d   }|r&t        j                  |t         j                        }nd }t        j                  |t         j                        }	||	fS Nr   r   )rB   r  r   r   
r  r   r  r  padding_modealign_cornersrj  input_requires_gradr  	grad_grids
             r/   grid_sampler_2d_backward_metar    sQ     &a.%%e5;R;RS

  U5L5LMI	""r1   c                     t        | |       t        | ||       | j                  d   }| j                  d   }|j                  d   }|j                  d   }|j                  d   }	| j                  |||||	f      S )Nr   r   r   r   )r  r  r   r   )
r   r  r  r  r  rL  Cout_Dout_Hout_Ws
             r/   grid_sampler_3dr    sv     eT*%'9:AAAAJJqMEJJqMEJJqME??Aq%677r1   r  c                     t        ||       t        |||       |d   }|r&t        j                  |t        j                        }nd }t        j
                  |t        j                        }	||	fS r  )r  r  rB   r  r  r   r  s
             r/   grid_sampler_3d_backwardr    sm     eT*%'9:%a.%%!?!?

 
  U5S5STIy  r1   c                     |j                  dd       }|st        j                  |      }||d<   t        j                  | g|i |S )NrJ   )rI   r:   	get_dtyperB   rv   )r   rK  r<   r5  rJ   s        r/   fullr
    sE    JJw%E
+F7O;;t-d-f--r1   c                 N   |t         j                  k(  rt        j                  |d u d        t        j                  d|| j                  n|||| j
                  n||      }| j                  r>|j                  | j                         | j                         | j                                n/|j                  | j                         | j                         d       |j                  d       |S t        j                  j                  | |||||      }|j!                  d       |S )Nc                       y)Nz9memory format option is only supported by strided tensorsr5   r5   r1   r/   rQ   zzeros_like.<locals>.<lambda>  rY   r1   r   r   Trt  )rB   
sparse_coorS   rv   rJ   rn   	is_sparsesparse_resize_and_clear_r   
sparse_dim	dense_dimrq   _coalesced_r&   r   r   fill_)r   rJ   rm   rn   ro   r   rk  s          r/   r  r    s     !!!T!O	

 kk %$**5"(.4;;f!
 >>((		T__.0@ ((dhhj!D

//
!
!# " C IIaLJr1   c                     ddl m}  j                         }t        j                  |dk7  d        dk\  rn|z    j                        }t        j                   | |kD        xs  ||k\          fd       dk\  rn|z   t         j                               }t         j                               } j                         |   z  z   }|= |=  j                  |||      S )Nr   guard_size_obliviousc                       y)Nz-select() cannot be applied to a 0-dim tensor.r5   r5   r1   r/   rQ   zmeta_select.<locals>.<lambda>  rY   r1   c                  6    d dj                          d  S )Nzselect(): index z! out of range for tensor of size z at dimension r   rq   r}   r   s   r/   rQ   zmeta_select.<locals>.<lambda>  s%    "5')J99;-~cU, r1   )
r   r  rq   rB   r   r   r   r   r   r   )	r   rq   r}   r  r   r   new_sizer  new_storage_offsets	   ```      r/   meta_selectr    s    J88:D		?
 #sTzC99S>D	 %$/V3GQU3V	
	,	 aZEUT\EDIIK Hdkkm$J,,.C1HH3??8Z1CDDr1   c                 ,    t        j                  |       S r,   r:   clone_preserve_strides)r   r   rq   r}   s       r/   meta_select_scatterr         ''--r1   c                 ,    t        j                  |       S r,   r  )r   r   rq   ri   rh   steps         r/   meta_slice_scatterr$  !  r!  r1   dim_post_exprwrap_scalarc                 v    |dk  r|sJ d}| }|dz
  }| |k  s| |kD  rJ d|  d| d| d       | dk  r| |z  } | S )Nr   r   zdim z out of bounds (rd   re   r5   )rq   r%  r&  r|  r}  s        r/   r   r   '  sm    {.C
!
Cc	S3YR4u4DSEC5PQ)RR'
Qw}Jr1   c                 J    | j                         dk(  rdS | j                  |   S r  r  )r  rq   s     r/   ensure_nonempty_sizer)  3  s!    11.!''#,.r1   c                 :    t         j                         d      }t        j                         d      }t        j                  ||k(  d        t	        |      D ];  k7  s	t        j                  t              t               k   fd       = y )Nr   c                       y)NzDIndex tensor must have the same number of dimensions as input tensorr5   r5   r1   r/   rQ   z$gather_shape_check.<locals>.<lambda>=  rY   r1   c                  N    d dj                    dj                    d  z   S )Nz!Size does not match at dimension z expected index  to be no larger than self  apart from dimension r  )rq   r   r}   r   s   r/   rQ   z$gather_shape_check.<locals>.<lambda>C  s7    ;A3>Nu{{m\/

|;QRUQVWX r1   )r}  rq   rB   rS   r   r)  )r   rq   r}   	self_dims
index_dimsr   s   ```  @r/   gather_shape_checkr1  8  s    DHHJ"IUYY[!$J	LLZV 9 8LL$UA.2FtQ2OOXr1   c                 2   ddl m} t        || j                               } |j	                         dk(        }|sAt        j                  j                  t
        j                  k(  fd       t        | |       | j                  j                        S )Nr   r  c                  "    d j                    S )Nz2gather(): Expected dtype int64 for index, but got r{   r|   s   r/   rQ   zmeta_gather.<locals>.<lambda>Q  s    HV r1   )r   r  r   rq   r   rB   rS   rJ   r   r1  r   r   )r   rq   r}   sparse_gradr  wrapped_dimis_index_emptys     `    r/   meta_gatherr7  H  sp    J dhhj1K)%++-1*<=NKK5::%V	
 	4e4>>%++&&r1   c                     |r6| dk(  ry| dk(  ry| dk(  ry| dk(  ry| d	k(  ry
t        j                  dd        y | dk(  ry| dk(  ryt        j                  dd        y )Nsum
REDUCE_ADDr  REDUCE_MULTIPLYmeanREDUCE_MEANamaxREDUCE_MAXIMUMaminREDUCE_MINIMUMFc                       y)Nz=reduce argument must be either sum, prod, mean, amax or amin.r5   r5   r1   r/   rQ   z#get_operator_enum.<locals>.<lambda>f  rY   r1   addmultiplyc                       y)Nz/reduce argument must be either add or multiply.r5   r5   r1   r/   rQ   z#get_operator_enum.<locals>.<lambda>n  rY   r1   r0  )reduce_use_new_optionss     r/   get_operator_enumrH  X  s{    e$ ##S	
 	e
"$UUVr1   c                     ddl m}  ||j                         dk7        r4t        j                  |j
                  t        j                  k(   fd       |1t        j                  |j
                  |j
                  k(   fd       y y )Nr   r  c                        dS )Nz"(): Expected dtype int64 for indexr5   method_names   r/   rQ   z,scatter_gather_dtype_check.<locals>.<lambda>y  s    {m#EF r1   c                        dS )Nz0(): Expected self.dtype to be equal to src.dtyper5   rK  s   r/   rQ   z,scatter_gather_dtype_check.<locals>.<lambda>  s    {m#ST r1   )r   r  r   rB   rS   rJ   r   )rL  r   r}   src_optr  s   `    r/   scatter_gather_dtype_checkrO  s  sd    JEKKMQ./KK5::%F	

 JJ'--'T	
 r1   c                     t        | d      S r   )r}  r   s    r/   ensure_nonempty_dimrQ    s    sA;r1   c                     ddl m}  |j                         dk(        ry t        j                  t         j                               t        j                               k(  d        d}t         j                               }t        |      D ]'  }t        |      }|k(  r|t         |      kD  s%d} n |s1/t        |      D ]!  }t        |      }|t        |      kD  sd} n ft        j                  t         j                               t        j                               k(  d        t        j                  |  fd       y t        j                  |  fd       y )	Nr   r  c                       yNzCIndex tensor must have the same number of dimensions as self tensorr5   r5   r1   r/   rQ   z%scatter_shape_check.<locals>.<lambda>  rY   r1   FTc                       yrT  r5   r5   r1   r/   rQ   z%scatter_shape_check.<locals>.<lambda>  rY   r1   c                  b    dj                    dj                    d  dj                    z   S )NExpected index r-  r.  z and to be no larger than src r  )rq   r}   r   rN  s   r/   rQ   z%scatter_shape_check.<locals>.<lambda>  s8    oekk]2Mdjj\Z&se+I'--YZ r1   c                  H    dj                    dj                    d  z   S )NrW  r-  r.  r  r  s   r/   rQ   z%scatter_shape_check.<locals>.<lambda>  s,    oekk]2Mdjj\Z&se,- r1   )	r   r  r   rB   rS   rQ  rq   r   r)  )	r   rq   r}   rN  r  is_wrong_shaper/  r   index_d_sizes	   ````     r/   scatter_shape_checkr[    sP   JEKKMQ./	LLDHHJ'+>uyy{+KKU
 N#DHHJ/I 9 +E158.tQ77!N g1y! 	A/q9L27A>>!%		 
+/B599;/OOY	
 	Z	
 	-	
r1   c                     t        || j                               }t        d| ||       t        | |||       |t	        ||       y y )Nscatter)r   rq   rO  r[  rH  )r   rq   r}   r   rF  rG  r5  s          r/   scatter_meta_implr^    sE     dhhj1Ky$s;k5#6'?3 r1   c                 V    t        | |||d       | j                  | j                        S NrC  r^  r   r   r   rq   r}   r   s       r/   meta_scatter_addrc    s%    dCU3>>$**%%r1   c                 $    t        | |||d       | S r`  r^  rb  s       r/   meta_scatter_add_rf    s    dCU3Kr1   c                     t        |t        j                        r|nd }t        | ||||       | j	                  | j
                        S r,   )r\   rB   r   r^  r   r   r   rq   r}   src_or_valuer7  r   s         r/   meta_scatterrj    s;     %\5<<@,dCdCV4>>$**%%r1   c                 `    t        |t        j                        r|nd }t        | ||||       | S r,   )r\   rB   r   r^  rh  s         r/   meta_scatter_rl    s-     %\5<<@,dCdCV4Kr1   queryr   r`  	dropout_p	is_causalreturn_debug_maskr<  c                 D   | j                  d      }| j                  d      }| j                  d      }	| j                  d      }
|j                  d      }| j                  dd      }t        j                  |      j                  dd      }t        j                  |||	ft        j
                  | j                        }|ra|
dkD  rdnd}t        j                  |	|z        }|dk  rd}n|dk  rd}t        j                  |||	|f| j                  | j                        }n,t        j                  d| j                  | j                        }||d d |	|t        j                  d	t        j                  d
      t        j                  d	t        j                  d
      |f	S )Nr   r   r   r   r  @         r5   rl   )r   r  rB   r   rv   rF   rn   r  ceilrJ   r   )rm  r   r`  rn  ro  rp  r<  r   	num_headsmax_seqlen_batch_qhead_dimmax_seqlen_batch_kquery_t	attention	logsumexpblocksize_cmax_seqlen_k
debug_masks                     r/   (meta__scaled_dot_product_flash_attentionr    si    AJ

1IAzz!}H!ooa#G  )33Aq9I	Y 23kk||I %]cyy!3k!AB$L3&L[[$6E++<<

 [[%++ellK
 	Bejj8Bejj8
 
r1   	attn_biascompute_log_sumexpc	           	      B   | j                  d      }	| j                  d      }
| j                  d      }|j                  d      }| j                  d      }|j                  d      }t        j                  |	|
||f| j                  | j                        }t        j                  |	|
|ft        j
                  | j                        }t        j                  dt        j                  d      }t        j                  dt        j                  d      }||d d ||||d f	S )Nr   r   r   r   r  r5   rl   )r   rB   rv   rJ   rn   rF   r   )rm  r   r`  r  r  rn  ro  rp  r<  r  r  S_QS_KVD_QKD_Vrk  
logsum_expseedoffsets                      r/   (meta__scaled_dot_product_cudnn_attentionr  $  s     	

1A

1A
**Q-C88A;D::b>D
**R.C
++q!S#&ekk%,,
OC	
Askk||J ;;rF;D[[5::f=F 	
 
r1   r  r|  	cum_seq_q	cum_seq_kmax_qmax_kphilox_seedphilox_offsetc                 J   t        j                  |j                  dd            j                  dd      }t        j                  |j                  dd            j                  dd      }t        j                  |j                  dd            j                  dd      }|||fS r  )rB   r   r  )r  rm  r   r`  r   r|  r  r  r  r  rn  ro  r  r  r<  grad_qgrad_kgrad_vs                     r/   'meta__scaled_dot_product_flash_backwardr  O  s    , eooa34>>q!DFcmmAq12<<QBFeooa34>>q!DF66!!r1   	attn_maskc                 B   | j                  d      }| j                  d      }| j                  d      }	| j                  d      }
t        j                  |       }t        j                  ||	|ft        j                  | j
                        j                  dd      }||fS )Nr   r   r   r   r  )r   rB   r   rv   rF   rn   r  )rm  r   r`  rn  ro  r  r<  r   rv  rw  rx  r{  r|  s                r/   0meta__scaled_dot_product_flash_attention_for_cpur  k  s     AJ

1IAzz!}H  'I	

 kk|| i1o  	 r1   c
                    |j                  d      }
|j                  d      }|j                  d      }|j                  d      }|j                  d      }t        j                  |
|||fd|j                  |j                        }t        j                  |
|||fd|j                  |j                        }t        j                  |
|||fd|j                  |j                        }|||fS )Nr   r   r   r   r   r   r   r   r  )r   rB   empty_permutedrJ   rn   )r  rm  r   r`  r   r|  rn  ro  r  r<  r   rv  rx  len_qlen_kr  r  r  s                     r/   9meta__scaled_dot_product_flash_attention_for_cpu_backwardr    s    & AJ

1Izz!}HJJqMEHHQKE!!	Yx0kk||	F !!	Yx0iizz	F !!	Yx0kk||	F 66!!r1   c                    | j                  dd      } |j                  dd      }|j                  dd      }| j                  d      }| j                  d      }	|j                  d      }
| j                  d      }| j                  d      }|j                  d      }t        j                  ||	||| j                  | j
                        }|rt        j                  |	dz        dz  nd}t        j                  |||ft        j                  | j
                        }|j                  dd      }t        j                  dt        j                  d	      }t        j                  dt        j                  d	      }||||fS )
Nr   r   r   r  r   r  r  r5   rl   )
r  r   rB   rv   rJ   rn   r  ru  rF   r   )rm  r   r`  r  r  rn  ro  r<  r  rw  rL  rv  KKvrk  logsumexp_dimr  r  r  s                      r/   ,meta__scaled_dot_product_efficient_attentionr    s2    OOAq!E
--1
COOAq!E

1A

1AA

2I

2A	BB
++aIrU\\
RC.@DIIa"f%*aM	
I}%kk||J --1
C ;;rF;D[[5::f=F
D&((r1   grad_input_maskc                    |j                  d      }|j                  d      }|j                  d      }|j                  d      }|j                  d      }|j                  d      }t        j                  ||||fd|j                  |j                        }t        j                  ||||fd|j                  |j                        }t        j                  ||||fd|j                  |j                        }d }|~|
d   ry|j                  d      }|dz  dk(  r|n
|dz   |dz  z
  }t        |j                               }||d<   t        j                  ||j                  |j                        }|d	d |f   }||||fS )
Nr   r   r   r   r  r  r   r)  .)r   rB   r  rJ   rn   r   rv   )r  rm  r   r`  r  r   r|  r  r  rn  r  ro  r<  r   rv  r  rx  
head_dim_vr  r  r  r  	grad_biaslastDimlastDimAligned	new_sizess                             r/   +meta__scaled_dot_product_efficient_backwardr    s{   ( AJ

1IJJqMEzz!}HAJHHQKE!!	Yx0kk||	F !!	Yx0iizz	F !!	Yz2kk||	F I!3..$$+bLA$57R<'TV,;V)*	&	"KKY__Y5E5E
	 c8G8m,	669,,r1   c                     t        j                  |      }t        j                  |      }t        j                  |      }|||fS r,   r3  )r  rm  r   r`  r   r|  r  r  r  r  r  r  r  rn  ro  r<  r  r  r  s                      r/   'meta__scaled_dot_product_cudnn_backwardr  !  sA    . e$Fc"Fe$F66!!r1   window_size_leftwindow_size_right	seqused_kalibi_slopesc                 2   || j                  d      n|j                         dz
  }|| j                  d      n|}||j                  d      n|}| j                  d      }| j                  d      }t        j                  |       }t        j                  |||ft        j
                  | j                        }|	ra|dkD  rdnd}t        j                  ||z        }|dk  rd}n|dk  rd}t        j                  ||||f| j                  | j                        }n,t        j                  d| j                  | j                        }||t        j                  d	t        j                  d
      t        j                  d	t        j                  d
      |fS )Nr   r   r  r   r  rr  rs  rt  r5   rl   )r   r   rB   r   rv   rF   rn   r  ru  rJ   r   )rm  r   r`  r  r  r  r  rn  ro  rp  r<  r  r  r  r  r   rw  ry  rv  rx  r{  r|  r}  r~  r  s                            r/   meta__flash_attention_forwardr  >  sc   4 #,"3A9JQ9NJ*3*;A(1(9!u

2Izz"~H   'I	Y 23kk||I %]cyy!3k!AB$L3&L[[$6E++<<

 [[%++ellK
 	Bejj8Bejj8 r1   c                     t        j                  |      }t        j                  |      }t        j                  |      }|||fS r,   r3  )r  rm  r   r`  r   r|  r  r  r  r  rn  ro  r  r  r<  r  r  
grad_querygrad_key
grad_values                       r/   meta__flash_attention_backwardr    sA    0 !!%(J$H!!%(Jx++r1   cu_seqlens_qcu_seqlens_kmax_seqlen_qr~  custom_mask_typecausal_diagonalseqlen_kwindow_sizec                    | j                  d      }| j                  d      }|j                  d      }| j                  d      }| j                  d      }|j                  d      }t        j                  ||||| j                  | j                        }||j                  d      dz
  n|}|}||J |}||n|}|
rt        j                  |dz        dz  nd}t        j                  |||ft        j                  | j                        }t        j                  dt        j                  d      }t        j                  dt        j                  d      }||||||fS )	Nr   r   r  r   r  r  r5   rl   )	r   rB   rv   rJ   rn   r  ru  rF   r   )rm  r   r`  r  r  r  r  r~  rn  r  r  r<  r  r  r  r  rw  rL  rv  r  r  rk  logsumexp_batch_dimactual_max_seqlen_qactual_max_seqlen_kr  r  r  r  s                                r/   !meta__efficient_attention_forwardr    sF   , 	

1A

1AA

2I

2A	BB
++aIrU\\
RC7C7O,++A.2VW'''**6*B,4F		%*+b0A  	i7kk||J ;;rF;D[[5::f=F
D&*=?RRRr1   bias_requires_gradnum_splits_keyshared_storage_dqdkdvc                    |rt        j                  |j                  d   |j                  d   k(  d        t        j                  |j                  d   |j                  d   k(  d        t        j                  g |j                  dd d|j                  d   |j                  d   |j                  |j
                        }|j                  d	d      }|j                  d	d      }|j                  d	d
      }n?t        j                  |      }t        j                  |      }t        j                  |      }|z|j                  d      }|dz  dk(  r|n
|dz   |dz  z
  }t        |j                               }||d<   t        j                  ||j                  |j
                        }|dd |f   }n!t        j                  d|j
                        }||||fS )Nr   c                       y)Nz,seqlen must match for `shared_storage_dqdkdvr5   r5   r1   r/   rQ   z4meta__efficient_attention_backward.<locals>.<lambda>  rY   r1   r   c                       y)Nz3embedding dim must match for `shared_storage_dqdkdvr5   r5   r1   r/   rQ   z4meta__efficient_attention_backward.<locals>.<lambda>  rY   r1   r   r  r   r  r  r   r)  .r5   rv  )
rB   rS   r   rv   rJ   rn   rX  r   r   r   )r  rm  r   r`  r  r  r  r  r~  r|  rn  r  r  r  r  r<  r  r  chunkr  r  r  r  r  r  r  s                             r/   "meta__efficient_attention_backwardr    s   2 KKNciil*B	
 	KKNciil*I	
 Eekk!BEEEKKOEU[[_E++<<

 \\"a(
<<A&\\"a(
%%e,
##C(%%e,
))B-$+bLA$57R<'TV,;V%	&	"KK	DKKP	c8G8m,	KK5<<8	xY66r1   scale_ascale_bscale_resultuse_fast_accumc                     d }t        j                   j                         dk(  xr j                         dk(   fd       t        j                   | j                        xr  |j                         fd       t	               dk(  rhd }	d }
d }t        j                   |	 j                               xs  |        fd	       t        j                   |
j                               xs  |      fd
       t        j                   j                  d      dz  dk(   fd       t        j                  j                  d      dz  dk(  xr j                  d      dz  dk(  fd       t        j                  j                  t         j                  k(  xr j                  t         j                  k(  d         j                  \  }j                  d      j                         dk(  rj                         dk(  rnt        j                  j                         dk(  xr j                         dk(  fd       j                  d      k(  rtj                  d      dk(  r`j                  d      dk(  rLj                  d      k(  r8t        j                  j                         xr j                         d        nt        j                  dfd       ||n j                  }t        j                   j                  d      j                  d      | j                        S )Nc                     | t         j                  t         j                  t         j                  t         j                  fv S r,   )rB   r-  float8_e5m2float8_e4m3fnuzfloat8_e5m2fnuzr{   s    r/   is_fp8_typez#meta_scaled_mm.<locals>.is_fp8_type  s8    !!!!	
 
 	
r1   r   c                  L    dj                          d j                          S )Nz%Inputs must be 2D but got self.dim()=z and mat2.dim()=r   r  r   s   r/   rQ   z meta_scaled_mm.<locals>.<lambda>$  s'    7
|CSTXT\T\T^S_` r1   c                  <    dj                    d j                    S )Nz8Expected both inputs to be fp8 types but got self.dtype=z and mat2.dtype=r{   r  s   r/   rQ   z meta_scaled_mm.<locals>.<lambda>(  s"    J4::,Vfgkgqgqfrs r1   rB  c                 ,    | d   | d   kD  xr | d   dk(  S r  r5   r  s    r/   is_row_majorz$meta_scaled_mm.<locals>.is_row_major-  s"    !9vay(;VAY!^;r1   c                 &    | d   dk(  xr | d   dkD  S r  r5   r  s    r/   is_col_majorz$meta_scaled_mm.<locals>.is_col_major0  s    !9>3fQi!m3r1   c                 V    | j                  d      dk(  xs | j                  d      dk(  S r  r   )	tensor_2ds    r/   has_zero_dimz$meta_scaled_mm.<locals>.has_zero_dim3  s)    >>!$)CY^^A->!-CCr1   c                  *    d j                          S )Nz#self must be row_major, got stride r  r   s   r/   rQ   z meta_scaled_mm.<locals>.<lambda>8      9$++-I r1   c                  *    d j                          S )Nz#mat2 must be col_major, got stride r  r  s   r/   rQ   z meta_scaled_mm.<locals>.<lambda><  r  r1   r   r)  r   c                  ,    d j                  d       S )NzBExpected self.size(1) to be divisible by 16, but got self.size(1)=r   r   r   s   r/   rQ   z meta_scaled_mm.<locals>.<lambda>@  s    XY]YbYbcdYeXfg r1   c                  "    d j                    S )Nz>Expected both dimensions of mat2 to be divisble by 16 but got r  r  s   r/   rQ   z meta_scaled_mm.<locals>.<lambda>D  s    TUYU_U_T`a r1   c                       y)Nz6Both scale_a and scale_b must be float (fp32) tensors.r5   r5   r1   r/   rQ   z meta_scaled_mm.<locals>.<lambda>J  rY   r1   c                  L    d j                         dj                         S )NzLFor non-tensorwise scaling, scale tensors must be 2D, but got scale_a.dim()=z and scale_b.dim()=r   )r  r  s   r/   rQ   z meta_scaled_mm.<locals>.<lambda>U  s,    gY`YdYdYfXhh|nunynyn{m}~ r1   c                       y)Nz@Both scale_a and scale_b must be contiguous for rowwise scaling.r5   r5   r1   r/   rQ   z meta_scaled_mm.<locals>.<lambda>a  rY   r1   Fc                      d  d dj                  d       dj                  d       dj                  d       dj                  d       dS )	Nz}Invalid scaling configuration. For tensorwise scaling, both scales should be scalar. For rowwise scaling, scale_a should be (z, 1), scale_b should be (1, z). Got scale_a.size()=(r   rd   r   z) and scale_b.size()=(re   r   )r2  r   r  r  s   r/   rQ   z meta_scaled_mm.<locals>.<lambda>g  sk    CCD#Eabcad e//6||A.?r',,q/AR S//6||A.?r',,q/ARRS	U r1   r  )rB   rS   rq   rJ   rC  r   r   r*  r   r   r.  rv   rn   )r   r  r  r  r  r  r  r  r  r  r  r  r1  
_out_dtyper2  r   s   ````          @@r/   meta_scaled_mmr    s   
 
LL
a+DHHJ!O` 
LLDJJ;K

$;s
 4F"	<	4	D 	'=<+=I	
 	'=<+=I	
 	IIaL2"g	
 	IIaL2"=tyy|b'8A'=a	
 	MMU]]*Mw}}/ML	
 zz1IIaL==?aGMMOq$8 LL"9w{{}'9~ Q1$LLOq(LLOq(LLOq( ))+G0E0E0G^ 	 (3J;;tyy|TYYq\DKKXXr1   c                 Z    t        | ||||d       | j                  | j                        S NT)rG  ra  r   rq   r}   r   r7  r5  s         r/   meta_scatter_reduce_twor  t  s)     dCVTJ>>$**%%r1   c                 (    t        | ||||d       | S r  re  r  s         r/   meta_scatter_reduce__twor  {  s    dCVTJKr1   c                t    t        j                  d j                         cxk  xr dk  nc  fd        j                         dk(  r0t        j                  |t         j                   j
                        S t        j                   j                  d      |t         j                   j
                        S )Nr   r   c                  *    d j                          S )Nz@The probabilty distributions dimensions must be 1 or 2, but got r   r(  s   r/   rQ   z"meta_multinomial.<locals>.<lambda>  s    RSXS\S\S^R_` r1   r   r  )rB   rS   rq   rv   r   rn   r   )r   num_samplesreplacementr   s   `   r/   meta_multinomialr    s|     
LL	EIIK1` yy{a{{;ejjNN;;

1{%**U\\ r1   c                 "    d}| D ]  }||z  }	 |S r   r5   )vsr.  vs      r/   multiply_integersr    s$    	A 	QHr1   c                 L    t        j                  t              k(  fd       dz   t        j                  t               k(   fd       t        j                  t        d  dd  D              xr t        d D               fd        d d \  }}||gS )Nc                  &    d  dt               S )Nz%It is expected output_size equals to , but got size r  )num_spatial_dimsr  s   r/   rQ   z'upsample_common_check.<locals>.<lambda>  s    78H7IY\]hYiXjk r1   r   c                  &    d  dt               S )Nz$It is expected input_size equals to r  r  )expected_input_dimsr  s   r/   rQ   z'upsample_common_check.<locals>.<lambda>  s    67J6K?[^_i[jZkl r1   c              3   &   K   | ]	  }|d kD    ywr   Nr5   r^   rr  s     r/   r`   z(upsample_common_check.<locals>.<genexpr>  s     *aAE*   c              3   &   K   | ]	  }|d kD    ywr  r5   r  s     r/   r`   z(upsample_common_check.<locals>.<genexpr>  s     2NQ1q52Nr  c                      d  d S )NzDInput and output sizes should be greater than 0, but got input size z and output size r5   )r  r  s   r/   rQ   z'upsample_common_check.<locals>.<lambda>  s      \!2;-A r1   )rB   rS   r   rb  )r  r  r   r  channelsr  s   ```  @r/   upsample_common_checkr
    s    	LLK,,k +Q.	LLJ..l
 
LL*:ab>**Ns2N+2N/N	A ""1~FHH+{++r1   c                 4    t        j                   j                         dk7  xs t         j	                         dd         fd       t         j	                         |d      } j                  |      j                  t        j                               S )Nr   r   c                  *    d j                          S )Nz>Non-empty 3D data tensor expected but got a tensor with sizes r   r(  s   r/   rQ   z$upsample_nearest1d.<locals>.<lambda>      PQVQ[Q[Q]P^_ r1   r   r   
rB   rS   r   r  r   r
  r   r   r:   rl  )r   r  scalesfull_output_sizes   `   r/   upsample_nearest1dr         
LLA/

QR0@A_ -

kA ??+,//11%8 0  r1   c                     t        j                   j                         dk7  xs t         j	                         dd         fd       t         j	                         |d      } j                  |      }t        j                         } j                  \  }}}} j                  j                  dk(  r|dk  rt         j                  }|j                  |      }|S )	Nr   r   c                  *    d j                          S Nz>Non-empty 4D data tensor expected but got a tensor with sizes r   r(  s   r/   rQ   z$upsample_nearest2d.<locals>.<lambda>  r  r1   r   r  rB  r  r   )rB   rS   r   r  r   r
  r   r:   rl  r   rn   rf   r   r   )	r   r  scales_hscales_wr  r   r   r=   
n_channelss	   `        r/   upsample_nearest2dr    s     
LLA/

QR0@A_ -

kA __-.F //6M  ++Az1a||F"zA~//];FMr1   r  r  r  r  c                 X    t        ||d      t        j                   j                  dk(   fd       t	        d      D ]2  t        j                   j                           k(   fd       4  j                  |      j                  t        j                               S )Nr   r  r  c                  "    d j                    S )NzFExpected grad_output to be a tensor of dimension 4 but got: dimension r  r6  s   r/   rQ   z-upsample_nearest2d_backward.<locals>.<lambda>  s    XYdYiYiXjk r1   c            
      D    d d     d dj                         S )NzCExpected grad_output to have the same shape as output; output.size(z) = z but got grad_output.size(r   )r  r  r   s   r/   rQ   z-upsample_nearest2d_backward.<locals>.<lambda>  s>      !s$'7':&;,QCtK4D4DQ4G3HJ r1   r   )
r
  rB   rS   r   r   r   r   r   r:   rl  )r  r  r  r  r  r  r   s   `    @@r/   upsample_nearest2d_backwardr    s     -K! 
LLAk 1X 
Q#3A#66	

   ,//11+> 0  r1   c                 4    t        j                   j                         dk7  xs t         j	                         dd         fd       t         j	                         |d      } j                  |      j                  t        j                               S )Nr   r   c                  *    d j                          S )Nz>Non-empty 5D data tensor expected but got a tensor with sizes r   r(  s   r/   rQ   z$upsample_nearest3d.<locals>.<lambda>  r  r1   r   r  r   r  )r   r  scales_dr  r  r  s   `     r/   upsample_nearest3dr"    r  r1   c                    t        j                  |       t        j                  | t         j                        }}||t        |t              sJ t        |t              sJ |j
                  }|j                         }	t        ||      }t        ||      }|j                  ||	       |j                  ||	       t        ||       t        ||       ||fS ||fS )Nr{   )r^  r_  )
rB   r   r   r\   r   r   r   r   r  r!   )
r   stablerq   
descendingr   r   r  r   r   
out_strides
             r/   	meta_sortr'    s     D!5#3#3D#LqAg1&*---':... GG	XXZ
"695#GY79j1Iz2F3G4wa4Kr1   c           	          t        j                   j                  dk(   fd       t        j                   j                  j                  k(   fd        j	                  d      t        j                  j                  dk(  fd       t        j                  j                         k(  fd       t        j                  j                  j                  k(  fd       t        j                  j                  dk(  fd        j	                  d	      z  z  t        j                  j                         k(   fd
       t        j                  t         fdfD              d        y )Nr   c                  "     j                    dS Nz != 2r  )input_gatess   r/   rQ   z%rnn_cell_checkSizes.<locals>.<lambda>+      ;3C3C2DE0J r1   c                  :    j                    d j                    S N != r  )hidden_gatesr+  s   r/   rQ   z%rnn_cell_checkSizes.<locals>.<lambda>.  s     ;$$%T,*<*<)=> r1   r   c                  "     j                    dS )Nz != 1r  )
input_biass   r/   rQ   z%rnn_cell_checkSizes.<locals>.<lambda>2  s    joo5Fe3L r1   c                  .    j                          d  S r.  r  )
gates_sizer2  s   r/   rQ   z%rnn_cell_checkSizes.<locals>.<lambda>5  s    z'')*$zl; r1   c                  :    j                    d j                    S r.  r  )hidden_biasr2  s   r/   rQ   z%rnn_cell_checkSizes.<locals>.<lambda>9  s     z''([->->,?@ r1   c                  "     j                    dS r*  r  )prev_hiddens   r/   rQ   z%rnn_cell_checkSizes.<locals>.<lambda>;  r,  r1   r   c            
      `    j                          dj                  d       d d d  d
S )Nr/  r   z * z // z (aka re   )r   r   )expected_prev_hidden_numelfactorr4  r+  r8  s   r/   rQ   z%rnn_cell_checkSizes.<locals>.<lambda>?  sB    ;$$&'tK,<,<Q,?+@J<tTZS[[ab|a}}~ r1   c              3   P   K   | ]  }|j                   j                   k(    y wr,   rv  )r^   r?   r+  s     r/   r`   z&rnn_cell_checkSizes.<locals>.<genexpr>B  s(      
 HH***
s   #&c                       y)Nz%expected all inputs to be same devicer5   r5   r1   r/   rQ   z%rnn_cell_checkSizes.<locals>.<lambda>F  rY   r1   )rB   rS   r   r   r   r   rb  )r+  r0  r2  r6  r;  r8  r:  r4  s   ``````@@r/   rnn_cell_checkSizesr>  #  s@    
LL!!Q&(JK	LL\///> !!!$JZ__)+LM*,;	
 	 1 11@	
 
LL!!Q&(JK!,!1!1!!4z!AV!K	LL99 
LL 
"J[I
 	
 	8r1   c                 
   t        | |||d|       t        j                  | t        j                        }t        j                  |t        j                        }t        j                  |t        j                        }|||fS )Nr  r   )r>  rB   r   r   )r+  r0  cxr2  r6  	workspacehycys           r/   _thnn_fused_lstm_cell_metarD  J  sk     \:{ArR  E<S<STI			"E,C,C	DB			"E,C,C	DBIr1   c                 b   t        |      dk7  }|r t        |      }|d   }| j                  d   }nB|
r| j                  d   n| j                  d   }|
r| j                  d   n| j                  d   }d}|rdnd}|dk7  r|n|}|r|||z  g}n|
r||||z  gn||||z  g}| j                  |      }|	|z  ||g}|"t        j                  d| j
                        }n|j                  |      }|j                  |	|z  ||g      }|rdnd}| j                  |t        j                        }|||||fS )Nr   r   r   r   rv  r{   )r   r   r   rB   rv   rn   r  )r   r  weight_stride0
weight_bufhxr@  r*  hidden_size	proj_size
num_layersbatch_firstdropouttrainbidirectionalbatch_sizesdropout_stateis_input_packed
seq_length
mini_batchbatch_sizes_sumnum_directionsout_sizer   r   
cell_shaperC  rB  reserve_shapereserves                                r/   
_cudnn_rnnr[  Y  sS   & +&!+O%
 ^
++a.'2U[[^A
'2U[[^A
'QQN%NyH$h&?@	  X%>?j(^*CD 	
 __Y'F~-z;GJ	z[[5<<0\\*%	zN2JI	JB AAMoom5;;o?G2r7J..r1   c                 (   |r| j                   d   n| j                   d   }|r| j                   d   n| j                   d   }|
}|r|||gn|||g}| j                  |      }|"t        j                  d| j                        }n|j                  |j                         }|"t        j                  d| j                        }n|j                  |j                         }t        j                  d| j                  t        j
                        }||||fS )Nr   r   rv  r   )r   r   rB   rv   rn   r  )r   w0w1w2w3hx_cx_r   rP  r*  rI  rK  
has_biasesrO  rL  rN  rS  rT  output_chanelsr   r   rB  rC  rA  s                           r/   mkldnn_rnn_layerre    s    & $/QEKKNJ#.QEKKNJ N  
Z0*n5 
 __Y'F
{[[5<<0]]399%
{[[5<<0]]399%Aell%++FI2r9$$r1   c                     | j                   dk(  r%t        j                  dk(  xs dk(  fd       y t        j                  | j                        dk7  fd       y )Nr   r   c                       d  S )Nz4: Expected reduction dim -1 or 0 for scalar but got r5   rq   r  s   r/   rQ   z'zero_numel_check_dims.<locals>.<lambda>  s    wiSTWSXY r1   c                       d  dS )Nz: Expected reduction dim z to have non-zero size.r5   rh  s   r/   rQ   z'zero_numel_check_dims.<locals>.<lambda>  s    wi8=TU r1   )r   rB   r   r   )r   rq   r  s    ``r/   zero_numel_check_dimsrj    sR    yyA~1H!r	Y	

 	IIcNaU	
r1   c                      |(t        ||j                               }t        ||        y t        j                  |j                         dk7   fd       y )Nr   c                        dS )Nz@: Expected reduction dim to be specified for input.numel() == 0.r5   r  s   r/   rQ   z%check_argmax_argmin.<locals>.<lambda>  s    tf\] r1   )r   rq   rj  rB   rS   r   )r  r   rq   s   `  r/   check_argmax_argminrm    sC    
S$((*-dC.JJLA]	
r1   c                     t        d| |       t        j                  | j                  ||fnd       }t	        | ||      }| j                  |t        j                        S )Nargmaxr{   )rm  r:   rW  r   rX  r   rB   r   )r   rq   rZ  rR  r   s        r/   argmax_argmin_metarp    sQ    $,

coSF4PD$T49E>>%u{{>33r1   c                 |    |t         j                  k(  rt         j                  }t        j                  d||||      S )Nr5   r   )rB   jaggedr}  rv   )rr  rJ   rm   rn   ro   s        r/   scalar_tensorrs    s5    
 ;;
%v* r1   c                 |   t        || j                         d      }| j                         dk(  rdn| j                  |      }t        j                  |dk\  xr ||k  d        t        | j                        }t        |      dkD  r|||<   | j                  |      | j                  |t        j                        fS )NTr&  r   r   c                       y)Nzk not in range for dimensionr5   r5   r1   r/   rQ   ztopk_meta.<locals>.<lambda>  rY   r1   r{   )
r   rq   r   rB   rS   r   r   r   r   r   )r   r1  rq   largestsorted	sliceSizetopKSizes          r/   	topk_metar{    s     dhhjd
;CXXZ1_$))C.I	LLa*AN,RSDJJH
8}q>>(#T^^HEKK^%PPPr1   c                     |	|J d       |j                         }| j                         }	t        j                  ||	j                  |	j                  |	j
                        S )Nz;segment_reduce(): Either lengths or offsets must be defined)rJ   rn   rm   )r   rB   r   rJ   rn   rm   )
r  r   rG  r7  rB  rC  rD  rF  data_contiggrad_contigs
             r/   meta__segment_reduce_backwardr    sj     	w2EDE2//#K//#K!!!!	 r1   c                    t        | j                         d      | j                         dkD  r| j                        nd}t        j                  |dk\  xr ||k  fd       t        | j                  d  | j                  dz   d  z         }|r%| j                         dkD  r|j                  d       | j                  |      | j                  |t        j                        fS )NTru  r   r   c                      d  S )Nz9kthvalue(): selected number k out of range for dimension r5   r   s   r/   rQ   zkthvalue_meta.<locals>.<lambda>  s    KC5Q r1   r{   )
r   rq   r   rB   rS   r   r   r   r   r   )r   r1  rq   rZ  dimSizer   s     `   r/   kthvalue_metar    s     dhhjd
;C $
QdiinAG	LL	Q1<Q
 DS!DJJsQwy$99:E488:>S!>>% $..ekk."JJJr1   c                    | | n|}t        j                  |j                         dk(  d        |j                         }| (t        j                  | j                         |k(  d        |(t        j                  |j                         |k(  d        t        j                  |j                         |k(  d        t        j                  |j                         |k(  d        t        j                  |j                         dk(  d        t        j                  |j	                         |d   |d	   z  d
z  k(  d        y )Nr   c                       yN r5   r5   r1   r/   rQ   z(checkLSTMBackwardSizes.<locals>.<lambda>  rY   r1   c                       yr  r5   r5   r1   r/   rQ   z(checkLSTMBackwardSizes.<locals>.<lambda>  rY   r1   c                       yr  r5   r5   r1   r/   rQ   z(checkLSTMBackwardSizes.<locals>.<lambda>   rY   r1   c                       yr  r5   r5   r1   r/   rQ   z(checkLSTMBackwardSizes.<locals>.<lambda>!  rY   r1   c                       yr  r5   r5   r1   r/   rQ   z(checkLSTMBackwardSizes.<locals>.<lambda>"  rY   r1   c                       yr  r5   r5   r1   r/   rQ   z(checkLSTMBackwardSizes.<locals>.<lambda>#  rY   r1   r   r   r  c                       yr  r5   r5   r1   r/   rQ   z(checkLSTMBackwardSizes.<locals>.<lambda>$  rY   r1   )rB   rS   rq   r   r   )grad_hygrad_cyr@  rC  rA  defined_gradexp_sizes          r/   checkLSTMBackwardSizesr    s    %17wL	LL!!#q(*5  "HW\\^x/<W\\^x/<	LLh&
3	LLh&
3	LLA%z2	LL"hqkHQK&?!&CCZPr1   c                     | |yt        | ||||       t        j                  |t              }t        j                  |t              }|r|j	                  dd      nd }|||fS )NNNNr   r   F)rZ  )r  rB   r   legacy_contiguous_memory_formatr9  )	r  r  r@  rC  rA  has_bias
grad_gatesgrad_cxr  s	            r/   #_thnn_fused_lstm_cell_backward_implr  (  sl    7?7GRY?!!!@J r1PQG4<
q%0$Iw	))r1   c                    d }d }d }|d   r|j                  | j                               }|d   s|d   rQ|j                  |j                  d      | j                  d      f      }|j                  |j                  d            }|||fS )Nr   r   r   r   rd  )rf  re  rg  rj  r  grad_weightr  s          r/   linear_backwardr  6  s    JKI1~!++FKKM:
1~Q",,l.?.?.CV[[QS_-UV **<+<+<R+@A	Y//r1   c                     t         j                        dkD  r j                  d   ||z  z  dk(  sJ d j                   d|        d  fd} j                  d   ||z  z  } j                  d   |z  } j                  d	   |z  }g  j                  d d |||} j                  |      }|j                   |       
      }|S )Nr   r  r   z'Invalid input shape for pixel_shuffle: z with upscale_factor = c                 b    t         j                  j                  |       t         j                  k(  S r,   rj  rn  s    r/   rp  z,meta_pixel_shuffle.<locals>.is_channels_lastI  s$    ""88=ATATTTr1   c                  2           r.t              dk(  rt        j                  S t        j                  S j	                  t        j                        rt        j                  S j	                  t        j
                        rt        j
                  S y r  )rC  rB   r   rm  r.  r  )rp  r   s   r/   rx  z.meta_pixel_shuffle.<locals>.pick_memory_formatL  s|    D!4 F*...***e.E.EF***e.C.CD((( Er1   r  r   r   )r   r   r   r   )	r   upscale_factorrx  r  HrWrr   r   rp  s	   `       @r/   meta_pixel_shuffler  C  s     	DJJ!

2.>2Q RVW We	0<STbScdeWU	) 	

2>N:;A	B.	(B	B.	(B-$**Sb/-1-b-"-I
..
#C
&&13&
4CJr1   c                 X   | j                  | j                        }|j                  |j                        }|j                  |j                        }|j                  |j                        }|j                  |j                        }|j                  |j                        }|||||||fS r,   r  )r   weight0weight1weight2weight3ra  cx_tmpr   hy_cy_grad_output_r_optgrad_hy_r_optgrad_cy_r_optr   r*  rI  rK  rc  rN  rO  rP  rL  rA  diff_xdiff_hxdiff_cxdiff_w1diff_w2diff_bs                                r/   mkldnn_rnn_layer_backwardr  a  s    4 __U[[)FmmCII&Gv||,G.G.Gw}}-F7GVVWgEEr1   )	out_int32r   c                    t        j                  | |rt         j                  nt         j                        j	                         S rU  )rB   r   r  r   r   )r   
boundariesr  r   s       r/   meta_bucketizer    s/     9EKK%++jlr1   c                 0    dt               dk(  r't        j                   j                          fd       t        j                  t	        t
              fd       t        j                  dkD  fd       t        j                  t	        t              fd       t        j                  t	        t              fd       t        j                  k\  d	        t        j                   j                   j                  
      S )Nzhistc()r  c                  $    d j                    dS )Nz%"histogram_cpu" not implemented for ''r{   r(  s   r/   rQ   zmeta_histc.<locals>.<lambda>  s    =ekk]!L r1   c                  $     dt                S )Nz#: argument 'bins' must be int, not r|  binsr  s   r/   rQ   zmeta_histc.<locals>.<lambda>  s    7)>tDzlK r1   r   c                       d  S )Nz: bins must be > 0, but got r5   r  s   r/   rQ   zmeta_histc.<locals>.<lambda>  s    gY.J4&#Q r1   c                  $      dt               S )Nz%: argument 'min' must be Number, not r|  )r  r|  s   r/   rQ   zmeta_histc.<locals>.<lambda>      7)@cL r1   c                  $      dt               S )Nz%: argument 'max' must be Number, not r|  )r  r}  s   r/   rQ   zmeta_histc.<locals>.<lambda>  r  r1   c                       y)Nz&{fn_name}: max must be larger than minr5   r5   r1   r/   rQ   zmeta_histc.<locals>.<lambda>  rY   r1   r   )
rC  rB   rS   r   r\   r   r   rv   rn   rJ   )r   r  r|  r}  r  s   ````@r/   
meta_histcr    s     G5U"##%L	
 
LL4!K 
LLQR	LL3L 
LL3L 
LLMN;;tELLDDr1   c                 B    t         j                         |d      }t        j                   j	                         dk7  xs# t        d  j                         dd  D               fd        j                  |      j                  t        j                               S )Nr   r  r   c              3   &   K   | ]	  }|d kD    ywr  r5   )r^   r   s     r/   r`   z,meta_upsample_bimode2d_aa.<locals>.<genexpr>  s     !Ht$(!Hr  r   c                  *    d j                          S r  r   r(  s   r/   rQ   z+meta_upsample_bimode2d_aa.<locals>.<lambda>  r  r1   r   )
r
  r   rB   rS   r   rb  r   r   r:   rl  )r   r  r  r  r  r  s   `     r/   meta_upsample_bimode2d_aar    s     -

kA 
LLHc!Huzz|AB7G!HH_ ??+,//11%8 0  r1   c                 P   t        j                  |j                         dk(  d        t        j                  |j                         dk(  d        t        j                  |j                  j                  d        t        j                  |j                  j                  d        y )Nr   c                       y)Nz%found_inf must be a 1-element tensor.r5   r5   r1   r/   rQ   z<_amp_foreach_non_finite_check_and_unscale_.<locals>.<lambda>  rY   r1   c                       y)Nz%inv_scale must be a 1-element tensor.r5   r5   r1   r/   rQ   z<_amp_foreach_non_finite_check_and_unscale_.<locals>.<lambda>  rY   r1   c                       y)Nz!found_inf must be a float tensor.r5   r5   r1   r/   rQ   z<_amp_foreach_non_finite_check_and_unscale_.<locals>.<lambda>  rY   r1   c                       y)Nz!inv_scale must be a float tensor.r5   r5   r1   r/   rQ   z<_amp_foreach_non_finite_check_and_unscale_.<locals>.<lambda>  rY   r1   )rB   rS   r   rJ   r   )r   rx  	inv_scales      r/   *_amp_foreach_non_finite_check_and_unscale_r    s|    	LLQ O 
LLQ O 
LL))3 
LL))3r1   c                 V    t        | j                               }| j                  |      S r,   )r   r   r   )r   nanposinfneginfr@  s        r/   
nan_to_numr    s#     tyy{#K>>+&&r1   c                    | j                   t        j                  t        j                  t        j                  t        j
                  hvsJ d| j                    d       | j                  }t        ||      }t        ||      }||k(  r| S t        | j                               }t        | j                               }||   ||   c||<   ||<   ||   ||   c||<   ||<   | j                  ||       | S )Nz>torch.transpose_: in-place transposition is not supported for z layout)rm   rB   r~  
sparse_cscr  
sparse_bscr   r   r   r   r   r  )r   dim0r#  ndimsr   r   s         r/   rr  rr    s     		
	
] 
H}T[\]	
 IIE$&D$&Dt|		D$++- F!'vd|F4L&,!$ZdDJT
T6"Kr1   c                    | j                   }| j                  r8| j                         }| j                         }|dk  r|dk(  s,J d| d| d       | j	                         dk  sJ d| d       t        | d|dk  rd      S d      S )	Nr   r   zEt_ expects a tensor with <= 2 sparse and 0 dense dimensions, but got z sparse and z dense dimensionsz6t_ expects a tensor with <= 2 dimensions, but self is r  r   )r   r  r  r  rq   rr  )r   r  r  r  s       r/   t_r    s    IIE~~__&
NN$	!O	Q	HRS]R^^jktju  vG  H	H. HHJ!O	MCE7!L	M dAEAIq55155r1   )r  r   sidesorterc                Z    t        j                  t         j                        dk  xs  j                  d d j                  d d k(   fd       t        j                  d u xs  j                  j                  k(   fd       t        j                  |dk7  xs | d       |rt         j                  nt         j
                  }t        t         j                        r%t        j                  |      j                         S t        j                  d| j                  	      S )
Nr   r   c                  `    dt        j                         dt         j                         S )Nztorch.searchsorted(): boundaries tensor should be 1 dimension or the first N-1 dimensions of boundaries tensor and input value tensor must match, but we got boundaries tensor z and input value tensor r   r   )r   sorted_sequences   r/   rQ   z#meta_searchsorted.<locals>.<lambda>  s8    3378M8M3N2O P""&tzz"2!35 r1   c                  l    dt         j                         dt        j                         S g  S )Nz[torch.searchsorted(): boundary and sorter must have the same size, but got boundary tensor z and got sorter tensor r  )r  r  s   r/   rQ   z#meta_searchsorted.<locals>.<lambda>"  sO    ##'(=(=#>"??V%+%7tFLL!@B  >@@B r1   r   zetorch.searchsorted(): side and right can't be set to opposites, got side of left while right was Truer{   r5   r  )rB   rS   r   r   r  r   r\   r   r   r   rv   rn   )r  r   r  r   r  r  rJ   s   ``   ` r/   meta_searchsortedr    s     
LLO!!"a' 	9  "%CR8	
	 
LL$?///6<<?	
 
LL#e)	$ %EKK%++E$%E2==??{{2U?3I3IJJr1   c                      t        j                   t         j                  t         j                  t         j                  fv fd       y )Nc                      d  S )Nz/Unsupported input type encountered for isin(): r5   r{   s   r/   rQ   z3_check_for_unsupported_isin_dtype.<locals>.<lambda>:  s    A%I r1   )rB   rS   r7  
complex128	complex64r{   s   `r/   !_check_for_unsupported_isin_dtyper  7  s/    	LLejj%"2"2EOODDIr1   c                 j    |	rt         j                  | ||||||||
|
      S t        | ||||||||
|
      S r,   )r&   _embedding_bag_sparse_backward!meta_embedding_bag_dense_backward)r  r   rC  r  r	  maximum_indicesnum_weightsr  r*  r  r  r  s               r/   meta_embedding_bag_backwardr  >  se     22
 	
 1
 	
r1   c
                 d    t        j                   j                  t         j                  t         j                  t         j
                  t         j                  fv  fd       t        d      \  }
}}||k(  rt        j                  |d u        j                  | j                  d      f      }|S )Nc                  "    d j                    S )Nz$Unsupported input type encountered: r{   )r  s   r/   rQ   z3meta_embedding_bag_dense_backward.<locals>.<lambda>x  s    6tzzlC r1   r   r   )
rB   rS   rJ   r+  r,  r*  float64r   r   r   )r  r   r  r	  r  r  r  r*  r  r  r  r  r  index_grad_weights   `             r/   r  r  i  s     
LL

u}}ennemmU]]SSC %*!H!Hix_D01TYYq\'BCr1   c                    t        d      \  }}}	| j                  d      }
t        j                  ||k(  d       t        j                  | j	                         dk(         t        j                  |j	                         dk(         |j                  d      }t        j                  |j	                         dk(         t        j                  |j                  d      |
k(         | j                  |f      }|S )Nr   r   zHembedding_bag_backward: per_sample_weights only supported for mode='sum'r   r   )r   r   rB   rS   rq   r   )r  r  r   rC  r  r*  r  r  r  r  embedding_featuresr  r   s                r/   .meta_embedding_bag_per_sample_weights_backwardr    s     %*!H!Hi1	LLR 
LLq!	LL!#$,,q/K	LL"#	LLQ#556^^[N+FMr1   )assume_uniqueinvertc                   t        j                  t        | t              xs t        |t              d        t        | t              s!t        j                  | |j
                        } t        |t              s!t        j                  || j
                        }t        | j                         t        |j                         t        j                  | t         j                        S )Nc                       y)Nz<At least one of elements and test_elements must be a Tensor.r5   r5   r1   r/   rQ   zmeta_isin.<locals>.<lambda>  rY   r1   rv  r{   )
rB   rS   r\   r   r  rn   r  rJ   r   r7  )elementstest_elementsr  r  s       r/   	meta_isinr    s     
LL8V$I
=&(IN h'<<1E1EFmV,]8??K%hnn5%m&9&9:HEJJ77r1   r   c                     t        j                  | dk\  d        t        |t        j                        \  }}t        j
                  ||      S )Nr   c                       y)Nz,polygamma(n, x) does not support negative n.r5   r5   r1   r/   rQ   z meta_polygamma.<locals>.<lambda>  rY   r1   rb  r{   )rB   rS   r   r   rc  r   )r   r   r=   r>   s       r/   meta_polygammar     sF     
LLaOP(;HHOA| D55r1   c                     t        d      )Nz.Tensor.item() cannot be called on meta tensors)r   r   s    r/   meta_local_scalar_denser    s    
G
HHr1   max_lengthspadding_valuec                     t        |      dk(  sJ t        |      dk(  sJ |d   j                  d   dz
  }|d   }||g| j                  dd  }| j                  |      S r  )r   r   r   )r   rC  r  r  r  rH  r3  s          r/   $meta__jagged_to_padded_dense_forwardr    sv     w<1{q   
aAAAq,6<<+,LL))r1   c                 B    t        |       t               d               }|S )Nc                 8    t        | t        j                        S r2  r@   r   rc  r  s    r/   _fz)_create_unary_float_meta_func.<locals>._f  s      =JJ
 	
r1   r6   r"   funcr
  s     r/   _create_unary_float_meta_funcr    *    4]
  

 Ir1   c                 B    t        |       t               d               }|S )Nc                 :    t        | |t        j                        S r2  r	  )r?   r  s     r/   r
  z*_create_binary_float_meta_func.<locals>._f  s      q!@!M!M
 	
r1   r  r  s     r/   _create_binary_float_meta_funcr    r  r1   c                      t                fd       } j                   d}||_         t        t        t        |            |      }|S )Nc                 `     | g|i |}t        | j                  |j                         | S r,   r_  )r   r<   r5  r   r.   s       r/   _fnz#_register_inplace_meta.<locals>._fn  s.    '''

CII6r1   r=   )r   rg   r6   getattrr&   )r.   r  inplace_names   `  r/   _register_inplace_metar    sO    
2Y 
 kk]!$LCL
4-l3
4S
9CJr1   c                 H    t        j                   j                  j                  k(   fd        g}t        t              rBt        j                   j                  j                  k(   fd       |j                         t        |dt        j                  iS )Nc                  <    dj                    d j                    S )Nr  z for `end`, but got dtype r{   )rh   ri   s   r/   rQ   zlerp.<locals>.<lambda>  s    /%++.HT r1   c                  <    d j                    dj                    S )Nr  z for `weight`, but got dtype r{   )ri   r  s   r/   rQ   zlerp.<locals>.<lambda>  s     oekk]2OPVP\P\~^ r1   r7   )	rB   rS   rJ   r\   r   r   r@   r   r;   )ri   rh   r  r<   s   ``` r/   lerpr  
  s     
LLsyy T 3<D&*%KK6<<'^	
 	F	=EE r1   )r`  c                <    t        | ||t        j                        S r2  r4  r   tensor1tensor2r`  s       r/   addcmulr!    s!     w0O0W0W r1   c                    t        j                  t        j                  |j                        xr t        j                  |j                         d        t        | ||t        j                        S )Nc                       y)N)zFInteger division with addcdiv is no longer supported, and in a future zErelease addcdiv will perform a true division of tensor1 and tensor2. z4The historic addcdiv behavior can be implemented as zA(input + value * torch.trunc(tensor1 / tensor2)).to(input.dtype) zfor integer inputs and as z6(input + value * tensor1 / tensor2) for float inputs. z?The future addcdiv behavior is just the latter implementation: z4(input + value * tensor1 / tensor2), for all dtypes.r5   r5   r1   r/   rQ   zaddcdiv.<locals>.<lambda>-  rY   r1   r3  )rB   rS   r:   r  rJ   r@   r   r;   r  s       r/   addcdivr$  %  sb     
LL""7==1 6&&w}}5	
		
  w0O0W0W r1   c                     i } dD ]  }t         |   }|D ]  }|| vs||   | |<    ! | j                         D ]  \  }}t        |t        j                  j
                        r,t        |t              sJ  |j                  t        j                  j                  j                        |       t        j                  j                  |j                         d      r|t         d   v st        | d      |j                  r|j                         dv rd|j                         v rt        j!                  ||       	d|j                         v rt"        j!                  ||       3d|j                         v rt$        j!                  ||       ]d	|j                         v rt&        j!                  ||       t(        j!                  ||        y )
N)rl   post_autogradpre_autogradCompositeImplicitAutogradrl   z is a CompositeImplicitAutograd op, we shouldn't register meta function for it. Instead, we should let the decomposition run and write meta kernels for the base operators.>   aten::cloneaten::copy_aten::rot90aten::_to_copyaten::empty_stridedaten::constant_pad_ndaten::as_strided_scatterzmkldnn::zmkl::zonednn::zquantized::)r   itemsr\   rB   _opsHigherOrderOperatorr   py_impl_CDispatchKeyr(   %_dispatch_has_kernel_for_dispatch_keyr  r   is_view2_meta_lib_dont_use_me_use_register_meta_for_mkldnnimpl/_meta_lib_dont_use_me_use_register_meta_for_mkl2_meta_lib_dont_use_me_use_register_meta_for_onednn5_meta_lib_dont_use_me_use_register_meta_for_quantized'_meta_lib_dont_use_me_use_register_meta)activate_meta_tablerf   registryopoop_overloadr.   s         r/   activate_metarB  I  s    : 9-d3 	9C--+3C=#C(	99 /446 6NR
 k5::#A#AB+z2226EHH00556r:8899 ;
 8@@""m $; ; 
    	 [--//BGGUWXK,,..?DD[RTU{//11BGGUWX+"2"2"44EJJ 8<<["Mm6Nr1   )Fr  r,   )NNNFr   r   F)NN)Tr  )r  )r  T)FF)TT)r,  )FTN)TFF)TF)r   )g      ?N)r8  str)r5   ry  r  F)r5   ry  FTN)Fr   FNFr   )NF)r   F)g      ?gUUUUUU?FN)NNNNN)r   NNr   )NNF)        FFN)rD  FNN)rD  FN)FN)FNNNN)NNNF)Nr   FNN)NNNN)r   TT)NNr   N)d   r   r   )r   )rD  (]  r  enumr   	functoolsr   typingr   r   r   r   r	   rB   torch._prims_commonrk  r:   r
   r   r   torch._decompr   r   r   r   
torch._opsr   torch._primsr   r   r   r   r   r   r   r   r   r   r   r   torch._prims_common.wrappersr   r   r    r!   r"   rY  r#   r$   torch.utilsr%   r2   opsr&   libraryLibraryr=  r6   r@   rL   rU   linspacelogspacer}  rx   taker   r   r   r   r   cummaxcumminr   r   r   _fft_c2cr   _fft_r2cr   randpermgenerator_outr   r   r   randintr   r   low_outr   randr   _fft_c2rr   rg  r   r  
unsqueeze_r
  _sparse_semi_structured_linearrC  rJ   r  _sparse_semi_structured_mmr  _sparse_semi_structured_addmmr  _cslt_sparse_mmr7  r  r4  index_reducer<  index_reduce_r>  index_selectrA  segment_reducerN  r}  	unary_outrR  rq   r[  r|  r^  r`  rh  rd  ri  _assert_asyncrl  msgrp  _printrs  _make_dep_tokenrw  r  _functional_sym_constrain_ranger  r  (_functional_sym_constrain_range_for_sizer  _functional_assert_asyncr  r   r  r   r  r  r  r  _linalg_eighr  r  _linalg_eigvalslinalg_eigvalsr  
linalg_eigr  r  r  r  r  r  r  r  linalg_inv_exr  linalg_ldl_factor_exr  linalg_ldl_solver  	linalg_lur  linalg_lu_factor_exr  linalg_lu_solver#  	lu_unpackr)  r2  	linalg_qrr9  r=  r:  _linalg_svdrI  r  r  rY  rm  linalg_solve_triangularru  r{  r  _linalg_detr  r  r  r  reflection_pad1dr  replication_pad1dr  r  reflection_pad1d_backwardr  replication_pad1d_backwardr  r  reflection_pad2dr  replication_pad2dr  reflection_pad2d_backwardr  replication_pad2d_backwardr  r  reflection_pad3dr   replication_pad3dr  reflection_pad3d_backwardreplication_pad3d_backwardr	  _pdist_forwardrF   r  _pdist_backwardr  baddbmmr'  	bernoullir+  
bernoulli_r.  r
  r1  poissonr4  _fused_moving_avg_obs_fq_helperrE  mmrO  rX  rC  rh  rp  miopen_batch_normr~  convolutionr  r4  _has_mkldnnr8  r  _convolution_pointwiser  _linear_pointwiser  has_mklr:  r  _mkl_linearr  r;  r  qconv2d_pointwiser  qlinear_pointwiser  r  linear_dynamic_fp16linear_relu_dynamic_fp16r  r<  r  
max_pool2dr  r  
avg_pool2dr  r  avg_pool2d_backwardr  
avg_pool3dr   avg_pool3d_backwardr  _adaptive_avg_pool2dr  _adaptive_avg_pool3dr  _adaptive_avg_pool2d_backwardr  _adaptive_avg_pool3d_backwardr"  r   adaptive_max_pool2dr3  r7  r9  adaptive_max_pool3dr@  rB  rC  repeat_interleaverF  r]   rJ  rM  r}   _unsafe_indexrb  convolution_backwardrn  addbmmrv  _fused_adam__fused_adamw_r  _fused_adamr  _int_mmr  _convert_weight_to_int4packr  #_convert_weight_to_int4pack_for_cpur  _weight_int4pack_mmr  _weight_int4pack_mm_for_cpur  _weight_int8pack_mmr  _cdist_forwardr  _cdist_backwardr  _embedding_bagr  _embedding_bag_forward_onlyr  r  nansumr  median	nanmedianr  
dim_valuesr*  r   r  logical_not_r  repeatr!  zero_r#  mul_Scalardiv_logical_and_logical_or_logical_xor_r&  add_sub_r0  rounddecimalsr6  r<  
__rshift__r@  
__lshift__rD  zerorF  r  rI  fillrK  relu_rN  	_add_relurP  rrelu_with_noiserU  rrelu_with_noise_functionalrW  rrelu_with_noise_rY  	index_put_unsafe_index_putr]  masked_fill_ra  _masked_scalerd  masked_scatter_rh  masked_scatterrj  masked_scatter_backwardrl  
index_put_rn  aliasrq  r|  bmmr~  r  r  r  r  r  r  r  r   max_pool2d_with_indices_backwardr  max_pool2d_with_indicesr  fractional_max_pool2dr  max_pool3d_with_indicesr   max_pool3d_with_indices_backwardr  r  r  r  grid_sampler_2d_backwardr  r  r  r
  r  rX  r  select_scatterr   slice_scatterr$  r   r)  r1  gatherr7  rH  rO  rQ  r[  r^  scatter_addrc  scatter_add_rf  r]  r   r`  r7  value_reducerj  scatter_rl  #_scaled_dot_product_flash_attentionr  #_scaled_dot_product_cudnn_attentionr  ,_scaled_dot_product_flash_attention_backwardr  +_scaled_dot_product_flash_attention_for_cpur  4_scaled_dot_product_flash_attention_for_cpu_backwardr  '_scaled_dot_product_efficient_attentionr  0_scaled_dot_product_efficient_attention_backwardr  ,_scaled_dot_product_cudnn_attention_backwardr  _flash_attention_forwardr  _flash_attention_backwardr  _efficient_attention_forwardr  _efficient_attention_backwardSymIntr  
_scaled_mmr  scatter_reducetwotwo_outr  scatter_reduce_r  multinomialr  r  r
  r  _upsample_nearest_exact1dr  _upsample_nearest_exact2dr  "_upsample_nearest_exact2d_backwardr"  _upsample_nearest_exact3dr   r$  values_stabler'  r>  _thnn_fused_lstm_cellrD  r[  re  rj  rm  ro  argminrp  rs  topkr{  _segment_reduce_backwardr  kthvaluer  r   r  r  r  r  pixel_shuffler  r  	bucketize
Tensor_outr  histcr  _upsample_bilinear2d_aa_upsample_bicubic2d_aar  r  r  rr  r  searchsortedr  r  _embedding_bag_backwardr  _embedding_bag_dense_backwardr  *_embedding_bag_per_sample_weights_backwardr  isinr  	polygammar   _local_scalar_denser  _jagged_to_padded_dense_forwardr  r  r  special_airy_aispecial_bessel_y0special_bessel_y1special_modified_bessel_i0special_modified_bessel_i1special_modified_bessel_k0special_modified_bessel_k1!special_scaled_modified_bessel_k0!special_scaled_modified_bessel_k1special_chebyshev_polynomial_tspecial_chebyshev_polynomial_uspecial_chebyshev_polynomial_vspecial_chebyshev_polynomial_w&special_shifted_chebyshev_polynomial_t&special_shifted_chebyshev_polynomial_u&special_shifted_chebyshev_polynomial_v&special_shifted_chebyshev_polynomial_wspecial_hermite_polynomial_hspecial_hermite_polynomial_hespecial_laguerre_polynomial_lspecial_legendre_polynomial_pr  r  r!  r$  lerp_addcmul_addcdiv_torch._refs.nn.functionaltorch._refs.specialrB  r5   r1   r/   <module>r0	     sq@      9 9  # + +  " U    < ) yy~~*/--*?*?PV*W '
3(* t}}-.
 

==5  /5p 		!!499==12'  3' !!))4+<+<+@+@AB%' %  C%$ t%%&I  'I 	[[$++//4;;+>+>P Xy! " !!))4+<+<+@+@AB/  C/1Hl %%t}}'8'89:  ;" %%t}}'8'89:  ; t}}**+"& 3 ,3 t}}$$% **
 &
 $$dll&6&678
 **  9   $,,"6"678 **  9 		!!499==12%)$tPT   3 %%t}}'8'89:K  ;K tzz!!" #0( t&&' ( t223
 "%)'+  6
	
 c] $ 4B t../
 (,	
  $	 00 t112 	
'+
  	 $ 3D t##$ ""'+"$/<,,/<\\/< 6
/< F	/<
 $/< /< /< /< /< %/<d t  (() 	I
	I		I 	I LL		I
 	I 	I 	I *	I t!!))* 	
			 	 LL		
 	 	 	 +	 t  (()' * ' t""**+
 !% $ $ W
 W W f	 W
 f W f W  W  W  W , WF   $(("4"456  7 txx||    $(("4"456  7 txx||  tzz!!"6 #6 tzz~~( (
 t!!))* + t!!%%& ' t{{""# $ t##++, ) -) t''//0, 1, t33;;< =
 t008896 :6& t<<DDE F
 t,,001 2
sF sC s   F  #  N (,


 !%
$V S C 
 
F 
$ 
 
"  	  	C  !!))4+<+<+H+HIJ]N+ s T  , K" $$,,d.A.A.E.EFGB B6 B  HB  !]N+	6 	 , "	Q QF Q t**+) )F )4 )F )  ,) t""#J JF J4 JF J  $J t}})6 )$ )6 )  ) t$$%)6 )$ )6 )  &) t&&../&  T  0" 	$$,,d.M.M.Q.QR .f .6 .f . .d t!!))*&   + ))1143L3L3P3PQRT8V$ 	
  	
 666!" % S& %%--t/D/D/H/HIJ ''' '
 ' '  K'T &&(:(:;<S#s/3 f  fff>T8U   =4 ((00$2J2J2N2NOPT8V$ 	  	
 666!" % QD $$,,d.B.B.F.FGH 444 4
 4 4 4  I4n t~~S#s 	$$$ $ 	$
 666!"$  $P tTz!2 * &&(:(:;<S#f C ffn8M   =4 $$,,d.B.B.G.GHIV[$1'v '%(F"G ' 2 J'$ t''(   	""" " SM	" )"J.
.
. 49d3i .".
.
. 3-. 66>	.(f V   t$$%
 ##!888 	8
 8 V8 	8 V8 6
8 6666)*8 &8v ,,44d6R6R6V6VWX   	
   
&	  Y2 t$$%S#4( +(
+(+( +( 	+(
 +( 66>+( ) &+(^ t''(
 )
 tzz
 WW	W W 	W
 W W  Wt>#;L t$$%=  &= t%%&>  '>(< t--.\S  /S t../\T  0T2Ej t$$%=  &= t%%&>  '> &&..&&11''//''22	 \& &@<G~ t$$%=  &= t%%&>  '> &&..&&11''//''22	 \$( $(N t""#

f 

 

v 

  $

 t##$Pv PV P Pf PQW P  %P $$dll&6&678/0 '  9'2 &&(:(:;<&* /  =/
 t$$% & t~~ / !/
 $$dll&6&678"  9" t33;;< * =*. tww	  	B
* 7;b,,bLLb $s)S.!b 49c>"	b
 DIsN#b b b U49c>23bJQ t%%--."$,,"$LL"$ 5<<
 "$ 5<<(	"$
 %,,'"$ "$ !&"$ "$ /"$J t''(),,)LL) ,,) I	)
 #Y) 3i) ) I) ) ))X 	889>9N9N&&:6 599##::BBC D, 599##55==>S ?S
 xx:?--:O:O66;
7 
uyy}}00	1	 
2	
 :?9N9N&&:6 599##55==>  ? D 599##55==>599##55<<= > ?, 599##77??@599##<<DDE	 F A	 =BMM<Q<QVV=9 599&&112 
 3
< t&&' I (IZ(<X t''//0E 1EP t QJ   QJh t''(\K(  )K(\ t((001 2" t((001@ 2@ t1199:F ;F, t112\P  3P
	
6 	
S 	
 t''(UI+  )+\ t001\H  2H$ t''(UI'  )'T t001\(  2(
 t%%,,-* .* $$dll&6&678T  9T ##++T-@-@-D-DEF9; @# @3 @  G@ 

!!4#5#5#<#<=>`J ?`JF ))1123H 4H: ##T[[__56./q '  7'0 !!))4+=+=+E+EFG  !
 H
2   (()*  !! +!H ~B  B* 0012 3& 889: ; (()*@ +@ 0012< 3< (()*< +< t""**+ & , &F t##$G  %G* t""**+
 	
a5 ,a5H t//7785 95
 ##T[[__56=$ =  7= ##T^^%;%;<=) >) !!					 Xy! "	 t  (() * t{{""#' $' tzz!!" # 								!!  !!

 									**Z 

""DJJ$7$789 :
" &&(>(>?@ A &&(>(>?@ A tyy  !& "& 

!!4::#4#456 7 		  $))"2"234" 5" tzz!!" # t~~$$%F   & %%&'RV"  (" 0012RV; 3; &&'(KO ) &&(>(>(F(FGH" I" t  ''( )
 t!!))* + t##$	 %	 t""#6  $6 t++,! -! t&&' ( tzz!!"! #!B txx 5 !56;h #-ZZZ 	Z 		Z
 	Z 	Z 	Z 	Z 	Z 	Z 	Z Z Z Z Z  !Z" #Z$ %Z& 'Z( )Z* +Z, -Zz;4|383838 38 		38
 	38 	38 	38 	38 	38 	38 	38 	38 38 38 38  !38" #38$ %38& '38lI2X t44<<=( >(V t++334 # 5#L t))112Q 3Qh t++,UI d  -dN t445\b  6bJ%
V %
6 %
Pt  v 3 $ t,,445# 6#$ t##$8  %8" t,,-\;'! ( .!, 		!!"#. $. t&&' ) ()X t{{E  E@ t""**+. ,. t!!))*. +.
	 	C 	d 	/
  t{{""#' $'6
 
-
b4 t''(& )&
 t  ! "
 !!	 & & ""	 889:
 #!55	5 5 	5
 5 5 E?5 ;5p 889: #!''	' ' 	'
 ' ' ' ' E?' ;'T 99( """" 
" 	"
 
" " " " " " " " " " E?"
". 88 "&!	  	
   E?
< AA #'!'"'"'" 
'" 	'"
 
'" '" '" '" '" E?'"
'"T <<=> !$)$)	$) $) 	$)
 $) $) E?$) ?$)N ==" !4-4-4- 
4- 	4-
 4- 
4- 4- 4- 4- 4- $Z4- 4- E?4-
4-n 99* "!""" 
" 	"
 
" " " " " " " " " " "  E?!"
"0 %%  "&*'+"&%)99	9 9 	9
 9 9 9 9 9 9 E?9 sm9  }9 9 6"9
9x &&( "&*'+#,,, 
, 	,
 
, , , , , , , , , , E?,  sm!,"  }#,
,4 ))   %!(,!%!%-S-S	-S -S 6
	-S
 6"-S 6"-S 3--S 3--S -S -S -S E?-S f%-S v-S #-S
-S` *** "$("'%474747 
47 	47
 6
47 6"47 6"47 ,,47 ,,47 47 47 47 47 47 47  E?!47" SM#47$  %47
47n ''() $(+/'+ aY
,,aY
,,aY \\aY \\	aY
 5<<
 aY 5<<(aY $aY aY *aYH ##'')<)<)D)DEF&  G&
 t##''( )
   (($*:*:*>*>?@	 	  A	,* 	$$d&D&D&L&LM

 	$$d&D&D&L&LM. ((00//77 !% $%U\\ 123 sELL012 uo	
 uo: 	$$d&D&D&L&LM

 									&$N t))112
  3 t&&'4/ (4/n t$$,,-$% .$%N


 ##T[[%8%89:4 ;4 t!!))* + tyy  !	Q "	Q t,,-LP  .  %%t}}';';<=Xy!K " >K #("9"9 Q t77??@	* A	* t##++,	0 -	0 t!!))* +: t--556F 7FD %%t~~'@'@AB27u   C 

|E  E0 	!!))4+F+F+N+NO ( t>>FFG H$ '')<)<=>'  ?'
 uyy~~(() *: uyy~~  !6 "6" t  !
 
	+K  "+K\ t++, '
 -'
T t112  3. t>>?  @0 tyy8=e 8  8  t~~6c 6 6F 6  6 t''(I& I )I t33;;<
 	**&\* c* 	* =*  d22 3 d44 5 d44 5 d== > d== > d== > d== > dDD E dDD E tBB C tBB C tBB C tBB C tJJ K tJJ K tJJ K tJJ K t@@ A tAA B tAA B tAA B tyy  " t||./    t||./   , 	tyy)!$,,/!$,,/
    BNJ r1   