
    Pgb                        d Z ddlZddlmZmZmZmZmZmZ ddl	Z	ddl	m
Z
 ddlmZmZmZ ddlmZmZmZ ddlmZmZmZ dd	lmZmZ dd
lmZmZ ddlmZmZm Z m!Z!m"Z"m#Z#m$Z$ ddl%m%Z%m&Z& ddlm'Z' ddl(m)Z) g dZ*ee	jV                     Z,eeee   e	jV                  f   Z-dee	jV                  ej\                  f   de	jV                  de/dee-e-f   fdZ0deee	jV                     eej\                     f   dee,   de/dee,df   fdZ1dee   de2dee,df   fdZ3	 	 	 	 	 d0dedee   dee/   de/dee   d ee   ddfd!Z4	 	 	 	 	 	 	 d1ded ed"ee   dee/   de/d#e/d$ee/   de/d%e/dee	jV                  df   fd&Z5d' Z6d( Z(e)e(_)        e	jn                  jq                         s e9d)      dd*l:m;Z;m<Z<m=Z=m>Z>m?Z?m@Z@mAZAmBZBmCZCmDZDmEZEmFZFmGZGmHZHmIZImJZJmKZKmLZLmMZMmNZNmOZOmPZPmQZQ dd+lRmSZSmTZTmUZU dd,lmVZV d- ZWe	jn                  j                  ZYe	jn                  j                  eYd.       e	jn                  j                  Z\e	jn                  j                  e\d/       y)2a  
``torch.autograd`` provides classes and functions implementing automatic differentiation of arbitrary scalar valued functions.

It requires minimal changes to the existing code - you only need to declare :class:`Tensor` s
for which gradients should be computed with the ``requires_grad=True`` keyword.
As of now, we only support autograd for floating point :class:`Tensor` types (
half, float, double and bfloat16) and complex :class:`Tensor` types (cfloat, cdouble).
    N)castListOptionalSequenceTupleUnion)_vmap_internals)handle_torch_functionhas_torch_functionis_tensor_like)_size_TensorOrTensors_TensorOrTensorsOrGradEdge   )
forward_ad
functionalgraph)detect_anomalyset_detect_anomaly)FunctionNestedIOFunction)_force_original_view_tracking _unsafe_preserve_version_counterenable_gradinference_modeno_gradset_grad_enabledset_multithreading_enabled)	gradcheckgradgradcheck_engine_run_backward)Variable)r#   r   backward	grad_moder   r   r   gradr   r    r   r   r   r   r   variableoutputr&   is_grads_batchedreturnc                    ddl m} t        | t        j                        r[|rt        d      | j                  j                  | j                     }t        j                  |j                        |j                  fS | j                  r=t        | |      s1|rt        d      | j                         }|j                         }||fS | j                  }|s|j                  n|j                  dd  }||fS )Nr   NestedTensorz1Batched grads are not supported with GradientEdgez3Batched grads are not supported with Nested Tensor.r   )$torch.nested._internal.nested_tensorr-   
isinstancer   GradientEdgeRuntimeErrornode_input_metadata	output_nrtorchSizeshape	is_nested_nested_tensor_size)	r(   r&   r)   r-   out_metadata	out_shape
grad_shapereg_out_shapereg_grad_shapes	            ^/var/www/html/suriana-translation/venv/lib/python3.12/site-packages/torch/autograd/__init__.py_calculate_shaper@   ;   s     B&%,,-RSS{{2263C3CDzz,,,-tzz99
6< @TUU..0	--/
*$$LLM'7TZZTZZ^N.((    outputsgrads.c           	      

   g }t        | |      D ]  \  }}t        t        t        j                  t
        j                  f   |      }d }d }t        |t
        j                        r|j                  j                  |j                     }t        j                  |j                        }|j                  }	|j                  }|j                  }
|j                   rt#        d      d}nYddlm} t        |t        j                        sJ |j                  }	|j(                  }
|
xr t        ||       }|s|j                  }t        |t        j                        rddlm}m} |s|n|d   }|r3t        |t        j                        sJ t        j0                  ||      }n!|J  | |||j3                                     }|st        t        t        j                  t
        j                  f   |      }t5        |||      \  }}|ret#        dt7        |j9                  |            z   dz   t7        |      z   dz   t7        | j9                  |            z   dz   t7        |      z   d	z         t#        d
t7        |j9                  |            z   dz   t7        |      z   dz   t7        | j9                  |            z   dz   t7        |      z   dz         |	j:                  |j                  j:                  k7  rot#        dt7        |j9                  |            z   dz   t7        |j                        z   dz   t7        | j9                  |            z   dz   t7        |	      z   dz         |j=                  |       f|`t        |t
        j                        s|j>                  r&t        |t
        j                        r|J tA        d |D              }n/t        |t        j                        sJ |jC                         dk(  }|st#        d      |	jD                  sd|	 }t#        |      t        |t
        j                        r1|J |J |j=                  t        jF                  ||	|             dt        |t        j                        sJ |j=                  t        jH                  |t        jJ                               |j=                  d        tM        dtO        |      jP                  z          tS        |      S )Nz4C++ NestedTensor are not supported with GradientEdgeFr   r,   )expect_truesym_eqzIf `is_grads_batched=True`, we interpret the first dimension of each grad_output as the batch dimension. The sizes of the remaining dimensions are expected to match the shape of corresponding output, but a mismatch was detected: grad_output[z] has a shape of z and output[z_. If you only want some tensors in `grad_output` to be considered batched, consider using vmap.zMismatch in shape: grad_output[.zuFor complex Tensors, both grad_output and output are required to have the same dtype. Mismatch in dtype: grad_output[z] has a dtype of c              3   &   K   | ]	  }|d k(    yw)r   N ).0os     r?   	<genexpr>z_make_grads.<locals>.<genexpr>   s     (BAa(Bs   r   z6grad can be implicitly created only for scalar outputszDgrad can be implicitly created only for real scalar outputs but got )dtypedevice)memory_formatz1gradients can be either Tensors or None, but got )*zipr   r   r5   Tensorr   r0   r/   r2   r3   r4   r6   r7   rM   rN   is_nested_tensoris_cpp_nested_tensorr1   r.   r-   r8   %torch.fx.experimental.symbolic_shapesrE   rF   is_same_sizesizer@   strindex
is_complexappendrequires_gradallnumelis_floating_pointones	ones_likepreserve_format	TypeErrortype__name__tuple)rB   rC   r)   	new_gradsoutr&   out_size
out_devicer:   	out_dtypeout_is_nestedout_is_cpp_nestedr-   rE   rF   
first_gradshape_matchesr;   r<   out_numel_is_1msgs                        r?   _make_gradsrq   X   s   
 (*I%( F	T5u'9'99:C@
c5--.8833CMMBLzz,"4"45H$**I%,,J(99M00"J  !& Jc5<<000		IMMM - Sjl6S2S$99dELL)Q%547J
 !!#u||444 % 2 23
 C
  +++ +F8Z__=N,O P 5u/A/A!ABCH(8%5)%	: $&5
 ekk$/01 .. j/* )) gmmC01	2 .
. i.)88 $ '9ekk$/01-. j/* )	)
 gmmC012 .. i.) 
 
 ##tzz'<'<<"7 %++d+,- *	*
 $**o& %% '--,-. ** )n	% 
  T"\#u112c6G6Gc5#5#56#///%((B(B%BN%c5<<888%(YY[A%5N%&P  !22$$-;0  's++c5#5#56#///%111$$

$"+#- &c5<<888$$5;P;PQ   &Ct*%%& GFN rA   tensorslengthc                 `    | d|z  S t        | t        j                        r| fS t        |       S )NN)r/   r5   rQ   re   )rr   rs   s     r?   _tensor_or_tensors_to_tuplerv      s4     '5<<(z>rA   grad_tensorsretain_graphcreate_graphgrad_variablesinputsc           	      "   t         j                  j                         rt        d      |,t	        j
                  dt        d       ||}nt        d      |t        |      dk(  rt        d      t        | t         j                        r| fn
t        |       } t        |t         j                  t        j                  f      r|fn|t        |      nd	}t        |t        |             }t        | |d
      }||}t        | ||||dd       y)a  Compute the sum of gradients of given tensors with respect to graph leaves.

    The graph is differentiated using the chain rule. If any of ``tensors``
    are non-scalar (i.e. their data has more than one element) and require
    gradient, then the Jacobian-vector product would be computed, in this
    case the function additionally requires specifying ``grad_tensors``.
    It should be a sequence of matching length, that contains the "vector"
    in the Jacobian-vector product, usually the gradient of the differentiated
    function w.r.t. corresponding tensors (``None`` is an acceptable value for
    all tensors that don't need gradient tensors).

    This function accumulates gradients in the leaves - you might need to zero
    ``.grad`` attributes or set them to ``None`` before calling it.
    See :ref:`Default gradient layouts<default-grad-layouts>`
    for details on the memory layout of accumulated gradients.

    .. note::
        Using this method with ``create_graph=True`` will create a reference cycle
        between the parameter and its gradient which can cause a memory leak.
        We recommend using ``autograd.grad`` when creating the graph to avoid this.
        If you have to use this function, make sure to reset the ``.grad`` fields of your
        parameters to ``None`` after use to break the cycle and avoid the leak.

    .. note::

        If you run any forward ops, create ``grad_tensors``, and/or call ``backward``
        in a user-specified CUDA stream context, see
        :ref:`Stream semantics of backward passes<bwd-cuda-stream-semantics>`.

    .. note::

        When ``inputs`` are provided and a given input is not a leaf,
        the current implementation will call its grad_fn (even though it is not strictly needed to get this gradients).
        It is an implementation detail on which the user should not rely.
        See https://github.com/pytorch/pytorch/pull/60521#issuecomment-867061780 for more details.

    Args:
        tensors (Sequence[Tensor] or Tensor): Tensors of which the derivative will be
            computed.
        grad_tensors (Sequence[Tensor or None] or Tensor, optional): The "vector" in
            the Jacobian-vector product, usually gradients w.r.t. each element of
            corresponding tensors. None values can be specified for scalar Tensors or
            ones that don't require grad. If a None value would be acceptable for all
            grad_tensors, then this argument is optional.
        retain_graph (bool, optional): If ``False``, the graph used to compute the grad
            will be freed. Note that in nearly all cases setting this option to ``True``
            is not needed and often can be worked around in a much more efficient
            way. Defaults to the value of ``create_graph``.
        create_graph (bool, optional): If ``True``, graph of the derivative will
            be constructed, allowing to compute higher order derivative products.
            Defaults to ``False``.
        inputs (Sequence[Tensor] or Tensor or Sequence[GradientEdge], optional): Inputs w.r.t. which the gradient
            be will accumulated into ``.grad``. All other Tensors will be ignored. If
            not provided, the gradient is accumulated into all the leaf Tensors that
            were used to compute the :attr:`tensors`.
    zbackward() called inside a functorch transform. This is not supported, please use functorch.grad or functorch.vjp instead or call backward() outside of functorch transforms.Nz;`grad_variables` is deprecated. Use `grad_tensors` instead.   
stacklevelzw`grad_tensors` and `grad_variables` (deprecated) arguments both passed to `backward()`. Please only use `grad_tensors`.r   z2`inputs` argument to `backward()` cannot be empty.rI   Fr)   T)allow_unreachableaccumulate_grad)r5   _C _are_functorch_transforms_activer1   warningswarnFutureWarninglenr/   rQ   re   r   r0   rv   rq   r"   )rr   rw   rx   ry   rz   r{   grad_tensors_s          r?   r$   r$      s"   @ xx002B
 	
 !I	

 )L& 
 c&kQ.OPP&w=wj5>G fu||U-?-?@A 
	  6]  0c'lKMOM#
 rA   grad_outputsonly_inputsallow_unusedmaterialize_gradsc	                     |rdu rt        d      |t               st         t        j                        rAt        t        t        t        j                     t        t        j                     f    f       nt                t              st        t        j                        rt        t        f      nt              t        d  D              }	t        d D              }
|	|
z   }t        |      rt        t        | ||||      S |st        j                   dt"        d       t%        |t'                     }t)         ||	      }|r) fd
} t+        j,                  |ddd      |      nt/         |d      |rNt1        fdt3        t'                    D              rt5        d      t        d t7              D              S )a  Compute and return the sum of gradients of outputs with respect to the inputs.

    ``grad_outputs`` should be a sequence of length matching ``output``
    containing the "vector" in vector-Jacobian product, usually the pre-computed
    gradients w.r.t. each of the outputs. If an output doesn't require_grad,
    then the gradient can be ``None``).

    .. note::

        If you run any forward ops, create ``grad_outputs``, and/or call ``grad``
        in a user-specified CUDA stream context, see
        :ref:`Stream semantics of backward passes<bwd-cuda-stream-semantics>`.

    .. note::

        ``only_inputs`` argument is deprecated and is ignored now (defaults to ``True``).
        To accumulate gradient for other parts of the graph, please use
        ``torch.autograd.backward``.

    Args:
        outputs (sequence of Tensor or GradientEdge): outputs of the differentiated function.
        inputs (sequence of Tensor or GradientEdge): Inputs w.r.t. which the gradient will be
            returned (and not accumulated into ``.grad``).
        grad_outputs (sequence of Tensor): The "vector" in the vector-Jacobian product.
            Usually gradients w.r.t. each output. None values can be specified for scalar
            Tensors or ones that don't require grad. If a None value would be acceptable
            for all grad_tensors, then this argument is optional. Default: None.
        retain_graph (bool, optional): If ``False``, the graph used to compute the grad
            will be freed. Note that in nearly all cases setting this option to ``True``
            is not needed and often can be worked around in a much more efficient
            way. Defaults to the value of ``create_graph``.
        create_graph (bool, optional): If ``True``, graph of the derivative will
            be constructed, allowing to compute higher order derivative products.
            Default: ``False``.
        allow_unused (Optional[bool], optional): If ``False``, specifying inputs
            that were not used when computing outputs (and therefore their grad is
            always zero) is an error. Defaults to the value of ``materialize_grads``.
        is_grads_batched (bool, optional): If ``True``, the first dimension of each
            tensor in ``grad_outputs`` will be interpreted as the batch dimension.
            Instead of computing a single vector-Jacobian product, we compute a
            batch of vector-Jacobian products for each "vector" in the batch.
            We use the vmap prototype feature as the backend to vectorize calls
            to the autograd engine so that this computation can be performed in a
            single call. This should lead to performance improvements when compared
            to manually looping and performing backward multiple times. Note that
            due to this feature being experimental, there may be performance
            cliffs. Please use ``torch._C._debug_only_display_vmap_fallback_warnings(True)``
            to show any performance warnings and file an issue on github if warnings exist
            for your use case. Defaults to ``False``.
        materialize_grads (bool, optional): If ``True``, set the gradient for unused inputs
            to zero instead of None. This is useful when computing higher-order derivatives.
            If ``materialize_grads`` is ``True`` and ``allow_unused`` is ``False``, an error
            will be raised. Defaults to ``False``.

    FzhExpected allow_unused to be True or not passed when materialize_grads=True, but got: allow_unused=False.c              3   8   K   | ]  }t        |      s|  y wru   r   rJ   is     r?   rL   zgrad.<locals>.<genexpr>  s     >AN1,=a>   c              3   8   K   | ]  }t        |      s|  y wru   r   r   s     r?   rL   zgrad.<locals>.<genexpr>  s     <1.*;Q<r   )r   rx   ry   r   r   r)   r   zonly_inputs argument is deprecated and is ignored now (defaults to True). To accumulate gradient for other parts of the graph, please use torch.autograd.backward.r}   r~   r   c           	      (    t        | d      S )NFr   r!   )gOr   ry   r{   rB   rx   s    r?   vjpzgrad.<locals>.vjp  s%    ' % rA   r   T)allow_none_pass_throughr   c              3   N   K   | ]  }|   d u xr t        |            y wru   r   )rJ   r   r{   results     r?   rL   zgrad.<locals>.<genexpr>  s6      
 1I?nVAY&?"??
s   "%zGmaterialize_grads cannot be used when the given input is a GradientEdgec              3   X   K   | ]"  \  }}||nt        j                  |d       $ y w)NT)r[   )r5   
zeros_like)rJ   r(   inputs      r?   rL   zgrad.<locals>.<genexpr>  s;      
   ! !!%t<=
s   (*)
ValueErrorr   r/   r   r0   r   r   r   r5   rQ   re   r   r   r
   r&   r   r   r   rv   r   rq   r	   _vmapr"   anyranger1   rP   )rB   r{   r   rx   ry   r   r   r)   r   	t_outputst_inputsoverridable_argsgrad_outputs_r   r   s   `` `` `       @r?   r&   r&   f  s   D \U2+
 	
 (g*We6H6H"I(5<<((53E3E*FFG'
 .fFE4F4F!G06)<v>>>I<<<H 8+*+$%%%#%-/
 	
 F 	
 0c'lKM1AM #
 		 		 P&&sAq$O
 &!
  
3v;'
 
 Y   
 $'vv#6	
 
 MrA   c                  >    t         j                  j                         S ru   )r#   _execution_engineis_checkpoint_validrI   rA   r?   _is_checkpoint_validr     s    %%99;;rA   c                      t        d      )NzItorch.autograd.variable(...) is deprecated, use torch.tensor(...) instead)r1   )argskwargss     r?   r'   r'     s    
S rA   zautograd initialization failed)_add_metadata_json_disable_profiler_disable_profiler_legacy_enable_profiler_enable_profiler_legacy_enable_record_function_get_sequence_nr_kineto_step_KinetoEvent _pop_saved_tensors_default_hooks_prepare_profiler_profiler_enabled_ProfilerResult!_push_saved_tensors_default_hooks _record_function_with_args_enter_record_function_with_args_exit_set_empty_test_observer_supported_activities_toggle_collection_dynamic
DeviceTypekineto_availableProfilerEventSavedTensor)ProfilerActivityProfilerConfigProfilerState)profilerc                 z    t        |t              st        d      t        j                  j                  | |       y )Nzcls isn't a typeinfo object)r/   rc   r1   r5   r   _register_py_class_for_device)rN   clss     r?   $_register_py_tensor_class_for_devicer   I  s,    c4 899	HH**637rA   z4Returns True if multithreading is currently enabled.z1Returns True if view-replay is currently enabled.)NNFNN)NNFTNFF)]__doc__r   typingr   r   r   r   r   r   r5   r	   torch.overridesr
   r   r   torch.typesr   r   r    r   r   r   anomaly_moder   r   functionr   r   r%   r   r   r   r   r   r   r   r   r    r"   r'   r#   __all__rQ   _OptionalTensor_ShapeorNestedShaper0   boolr@   rq   intrv   r$   r&   r   r   _autograd_initr1   torch._C._autogradr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   torch._C._profilerr   r   r   r   r   _is_multithreading_enabledis_multithreading_enabled_add_docstr_is_view_replay_enabledis_view_replay_enabledrI   rA   r?   <module>r      s'    ? ?  ! U U K K + + < 0   0 ' & 5<<(E8E?ELL@A )%,, 2 223)
,,) )  334	):M8ELL)8E4F4F+GGHMO$M M ?C 	M`&'14
?C  04#'1537qq+,q 4.q 	q
 -.q /0q 
qn 04#'#'"#a'a&a +,a 4.	a
 a a 4.a a a 5<<ab<  xx 
7
88      2 O N 8 "HH??    U 99    OrA   