
    Pg8*                         d dl mZ d dlmZmZmZ d dlZd dlZddlm	Z	 ddl
mZ ddlmZ ddlmZmZ dd	lmZ d
dlmZ ddlmZ dZdZ ej2                  e      ZdZe G d de             Z G d de      ZdgZy)    )	dataclass)ListOptionalUnionN   )Cache)$ImageClassifierOutputWithNoAttention)PreTrainedModel)%add_start_docstrings_to_model_forwardlogging)deprecate_kwarg   )AutoModelForImageTextToText   )ShieldGemma2Configzgoogle/shieldgemma-2-4b-itr   a  
    Args:
        input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
            Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you provide
            it.

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            [What are input IDs?](../glossary#input-ids)
        attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

            [What are attention masks?](../glossary#attention-mask)

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            If `past_key_values` is used, optionally only the last `input_ids` have to be input (see
            `past_key_values`).

            If you want to change padding behavior, you should read [`modeling_opt._prepare_decoder_attention_mask`]
            and modify to your needs. See diagram 1 in [the paper](https://arxiv.org/abs/1910.13461) for more
            information on the default strategy.

            - 1 indicates the head is **not masked**,
            - 0 indicates the head is **masked**.
        position_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
            config.n_positions - 1]`.

            [What are position IDs?](../glossary#position-ids)
        past_key_values (`Cache` or `tuple(tuple(torch.FloatTensor))`, *optional*):
            Pre-computed hidden-states (key and values in the self-attention blocks and in the cross-attention
            blocks) that can be used to speed up sequential decoding. This typically consists in the `past_key_values`
            returned by the model at a previous stage of decoding, when `use_cache=True` or `config.use_cache=True`.

            Two formats are allowed:
            - a [`~cache_utils.Cache`] instance, see our
            [kv cache guide](https://huggingface.co/docs/transformers/en/kv_cache);
            - Tuple of `tuple(torch.FloatTensor)` of length `config.n_layers`, with each tuple having 2 tensors of
            shape `(batch_size, num_heads, sequence_length, embed_size_per_head)`). This is also known as the legacy
            cache format.

            The model will output the same cache format that is fed as input. If no `past_key_values` are passed, the
            legacy cache format will be returned.

            If `past_key_values` are used, the user can optionally input only the last `input_ids` (those that don't
            have their past key value states given to this model) of shape `(batch_size, 1)` instead of all `input_ids`
            of shape `(batch_size, sequence_length)`.
        inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
            Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
            is useful if you want more control over how to convert `input_ids` indices into associated vectors than the
            model's internal embedding lookup matrix.
        use_cache (`bool`, *optional*):
            If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding (see
            `past_key_values`).
        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
            tensors for more detail.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
        cache_position (`torch.LongTensor` of shape `(sequence_length)`, *optional*):
            Indices depicting the position of the input sequence tokens in the sequence. Contrarily to `position_ids`,
            this tensor is not affected by padding. It is used to update the cache in the correct position and to infer
            the complete sequence length.
c                   4    e Zd ZU dZdZej                  ed<   y)0ShieldGemma2ImageClassifierOutputWithNoAttentionz^ShieldGemma2 classifies imags as violative or not relative to a specific policy
    Args:
    Nprobabilities)__name__
__module____qualname____doc__r   torchTensor__annotations__     }/var/www/html/suriana-translation/venv/lib/python3.12/site-packages/transformers/models/shieldgemma2/modeling_shieldgemma2.pyr   r   r   s     #'M5<<&r   r   c            !       
    e Zd ZeZdef fdZd Zd Zd Zd Z	d Z
d Zd	 Z ed
dd       ee      	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddej"                  dej$                  deej(                     deej"                     deeeej$                     ef      deej"                     deej"                     deej$                     deej"                     dee   dee   dee   dee   deeej(                  f   defd              Z xZS )"ShieldGemma2ForImageClassificationconfigc                     t         |   |       t        |dd      | _        t        |dd      | _        t        j                  |      | _        y )N)r!   yes_token_indexi *  no_token_indexi  )super__init__getattrr#   r$   r   from_configmodel)selfr!   	__class__s     r   r&   z+ShieldGemma2ForImageClassification.__init__~   sI    '&v/@&I%f.>E0<<FK
r   c                 J    | j                   j                  j                         S N)r)   language_modelget_input_embeddingsr*   s    r   r/   z7ShieldGemma2ForImageClassification.get_input_embeddings   s    zz((==??r   c                 N    | j                   j                  j                  |       y r-   )r)   r.   set_input_embeddings)r*   values     r   r2   z7ShieldGemma2ForImageClassification.set_input_embeddings   s    

!!66u=r   c                 J    | j                   j                  j                         S r-   )r)   r.   get_output_embeddingsr0   s    r   r5   z8ShieldGemma2ForImageClassification.get_output_embeddings   s    zz((>>@@r   c                 N    | j                   j                  j                  |       y r-   )r)   r.   set_output_embeddings)r*   new_embeddingss     r   r7   z8ShieldGemma2ForImageClassification.set_output_embeddings   s    

!!77Gr   c                 N    | j                   j                  j                  |       y r-   )r)   r.   set_decoder)r*   decoders     r   r:   z.ShieldGemma2ForImageClassification.set_decoder   s    

!!--g6r   c                 J    | j                   j                  j                         S r-   )r)   r.   get_decoderr0   s    r   r=   z.ShieldGemma2ForImageClassification.get_decoder       zz((4466r   c                 J    | j                   j                  j                         S r-   )r)   r.   tie_weightsr0   s    r   r@   z.ShieldGemma2ForImageClassification.tie_weights   r>   r   num_logits_to_keepz4.50logits_to_keep)versionnew_name	input_idspixel_valuesattention_maskposition_idspast_key_valuestoken_type_idscache_positioninputs_embedslabels	use_cacheoutput_attentionsoutput_hidden_statesreturn_dictreturnc                      | j                   d|||||||||	|
||||d|}|j                  }|ddd| j                  | j                  gf   }t	        j
                  |d      }t        ||      S )a  Predicts the binary probability that the image violates the specified policy.

        Returns:
            A `ShieldGemma2ImageClassifierOutputWithNoAttention` instance containing the logits and probabilities
            associated with the model predicting the `Yes` or `No` token as the response to that prompt, captured in the
            following properties.

                *   `logits` (`torch.Tensor` of shape `(batch_size, 2)`):
                    The first position along dim=1 is the logits for the `Yes` token and the second position along dim=1 is
                    the logits for the `No` token.
                *   `probabilities` (`torch.Tensor` of shape `(batch_size, 2)`):
                    The first position along dim=1 is the probability of predicting the `Yes` token and the second position
                    along dim=1 is the probability of predicting the `No` token.

            ShieldGemma prompts are constructed such that predicting the `Yes` token means the content *does violate* the
            policy as described. If you are only interested in the violative condition, use
            `violated = outputs.probabilities[:, 1]` to extract that slice from the output tensors.

            When used with the `ShieldGemma2Processor`, the `batch_size` will be equal to `len(images) * len(policies)`,
            and the order within the batch will be img1_policy1, ... img1_policyN, ... imgM_policyN.
        )rE   rF   rG   rH   rI   rJ   rK   rL   rM   rN   rO   rP   rQ   rB   N)dim)logitsr   r   )r)   rV   r#   r$   r   softmaxr   )r*   rE   rF   rG   rH   rI   rJ   rK   rL   rM   rN   rO   rP   rQ   rB   	lm_kwargsoutputsrV   selected_logitsr   s                       r   forwardz*ShieldGemma2ForImageClassification.forward   s    R $** 
%)%+))'/!5#)
 
"  B)=)=t?R?R(S!STo2>?"'
 	
r   )NNNNNNNNNNNNNr   )r   r   r   r   config_classr&   r/   r2   r5   r7   r:   r=   r@   r   r   SHIELDGEMMA2_INPUTS_DOCSTRINGr   
LongTensorFloatTensorr   r   r   r   r   boolintr   r[   __classcell__)r+   s   @r   r    r    {   s   %LL1 L@>AH777 )6DTU*+HI '+*.1537KO595959-1$(,0/3&*34>
##>
 ''>
 !.	>

 u//0>
 "%U->->(?(F"GH>
 !!1!12>
 !!1!12>
   1 12>
 ))*>
 D>>
 $D>>
 'tn>
 d^>
 c5<</0>
" 
:#>
 J V>
r   r    ) dataclassesr   typingr   r   r   r   torch.utils.checkpointcache_utilsr   modeling_outputsr	   modeling_utilsr
   utilsr   r   utils.deprecationr   autor   configuration_shieldgemma2r   _CHECKPOINT_FOR_DOC_CONFIG_FOR_DOC
get_loggerr   loggerr]   r   r    __all__r   r   r   <module>rr      s     " ( (     D - 1 . : 3 &			H	%H! V '7[ ' '^
 ^
D )r   