U
    Hra:Q                     @   sj   d Z ddlZddlZddlZddlmZ ddlZddlm	Z	 G dd dZ
G dd dZG d	d
 d
ZdS )a  
If you use the VADER sentiment analysis tools, please cite:

Hutto, C.J. & Gilbert, E.E. (2014). VADER: A Parsimonious Rule-based Model for
Sentiment Analysis of Social Media Text. Eighth International Conference on
Weblogs and Social Media (ICWSM-14). Ann Arbor, MI, June 2014.
    N)product)pairwisec                C   @   s  e Zd ZdZdZdZdZdZdddd	d
dddddddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d6d7d8d9d:d;d<d=d>d?d@h;ZeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeedABZ	dBdBdCdDdEdFdDdGZ
edHeej dIZdJdKdLdMdNdOdPdQdRdSdTdUdVdWdXdYdZgZd[d\ Zdfd^d_ZdgdadbZdcdd ZdeS )hVaderConstantsz8
    A class to keep the Vader lists and constants.
    gn?gnҿg~jt?gGzZaintZarentZcannotZcantZcouldntZdarentZdidntZdoesntzain'tzaren'tzcan'tzcouldn'tzdaren'tzdidn'tzdoesn'tZdontZhadntZhasntZhaventZisntZmightntZmustntZneitherzdon'tzhadn'tzhasn'tzhaven'tzisn'tzmightn'tzmustn'tZneedntzneedn'tnevernoneZnopeZnornotZnothingZnowhereZoughtntZshantZshouldntZuhuhZwasntZwerentzoughtn'tzshan'tz	shouldn'tzuh-uhzwasn'tzweren'tZwithoutZwontZwouldntzwon'tzwouldn'tZrarelyZseldomZdespite)BZ
absolutelyZ	amazinglyZawfullyZ
completelyZconsiderablyZ	decidedlyZdeeplyZeffingZ
enormouslyZentirelyZ
especiallyZexceptionallyZ	extremelyZ
fabulouslyZflippingZflippinZfrickingZfrickinZfriggingZfrigginZfullyZfuckingZgreatlyZhellaZhighlyZhugelyZ
incrediblyZ	intenselyZmajorlymoreZmostZparticularlyZpurelyZquiteZreallyZ
remarkablysoZsubstantiallyZ
thoroughlyZtotallyZtremendouslyZuberZunbelievablyZ	unusuallyZutterlyveryZalmostZbarelyZhardlyzjust enoughzkind ofZkindaZkindofzkind-ofZlesslittleZ
marginallyZoccasionallyZpartlyZscarcelyZslightlyZsomewhatzsort ofZsortaZsortofzsort-of         ?   g      )zthe shitzthe bombzbad assz
yeah rightzcut the mustardzkiss of deathzhand to mouth[].!?,;:-'"z!!z!!!z??z???z?!?z!?!z?!?!z!?!?c                 C   s   d S N )selfr   r   ]/var/www/ParaphraseApp/ParaphraseApp/venv/lib/python3.8/site-packages/nltk/sentiment/vader.py__init__   s    zVaderConstants.__init__Tc                    sn   | j  t fdd|D r dS |r:tdd |D r:dS t|D ]&\}}| dkrB| dkrB dS qBdS )z<
        Determine if input contains negation words
        c                 3   s   | ]}|   kV  qd S r   lower.0wordZ	neg_wordsr   r   	<genexpr>   s     z)VaderConstants.negated.<locals>.<genexpr>Tc                 s   s   | ]}d |  kV  qdS )zn'tNr    r"   r   r   r   r&      s     leastatF)NEGATEanyr   r!   )r   Zinput_wordsZ
include_ntfirstsecondr   r%   r   negated   s    zVaderConstants.negated   c                 C   s   |t || |  }|S )z|
        Normalize the score to be between -1 and 1 using an alpha that
        approximates the max expected value
        )mathsqrt)r   ZscorealphaZ
norm_scorer   r   r   	normalize   s    zVaderConstants.normalizec                 C   s^   d}|  }|| jkrZ| j| }|dk r0|d9 }| rZ|rZ|dkrP|| j7 }n
|| j8 }|S )zh
        Check if the preceding words increase, decrease, or negate/nullify the
        valence
                r   )r!   BOOSTER_DICTisupperC_INCR)r   r$   valenceis_cap_diffZscalarZ
word_lowerr   r   r   scalar_inc_dec   s    


zVaderConstants.scalar_inc_decN)T)r.   )__name__
__module____qualname____doc__ZB_INCRB_DECRr7   N_SCALARr)   r5   SPECIAL_CASE_IDIOMSrecompileescapestringpunctuationREGEX_REMOVE_PUNCTUATION	PUNC_LISTr   r-   r2   r:   r   r   r   r   r   !   sF  BG

r   c                   @   s0   e Zd ZdZdd Zdd Zdd Zdd	 Zd
S )	SentiTextzL
    Identify sentiment-relevant string-level properties of input text.
    c                 C   sF   t |tst|d}|| _|| _|| _|  | _| | j| _	d S )Nzutf-8)

isinstancestrencodetextrH   rG   _words_and_emoticonswords_and_emoticonsallcap_differentialr9   )r   rM   Z	punc_listZregex_remove_punctuationr   r   r   r     s    

zSentiText.__init__c                 C   sd   | j d| j}| }dd |D }dd t| j|D }dd t|| jD }|}|| |S )zt
        Returns mapping of form:
        {
            'cat,': 'cat',
            ',cat': 'cat',
        }
         c                 S   s   h | ]}t |d kr|qS    len)r#   wr   r   r   	<setcomp>#  s      z-SentiText._words_plus_punc.<locals>.<setcomp>c                 S   s   i | ]}d  ||d qS )rQ   rS   joinr#   pr   r   r   
<dictcomp>%  s      z.SentiText._words_plus_punc.<locals>.<dictcomp>c                 S   s   i | ]}d  ||d qS )rQ   r   rX   rZ   r   r   r   r\   &  s      )rG   subrM   splitr   rH   update)r   Zno_punc_textZ
words_onlyZpunc_beforeZ
punc_afterwords_punc_dictr   r   r   _words_plus_punc  s    
zSentiText._words_plus_puncc                 C   sJ   | j  }|  }dd |D }t|D ]\}}||kr(|| ||< q(|S )z
        Removes leading and trailing puncutation
        Leaves contractions and most emoticons
            Does not preserve punc-plus-letter emoticons (e.g. :D)
        c                 S   s   g | ]}t |d kr|qS rR   rT   )r#   wer   r   r   
<listcomp>3  s      z2SentiText._words_and_emoticons.<locals>.<listcomp>)rM   r^   ra   	enumerate)r   Zwesr`   irb   r   r   r   rN   +  s    
zSentiText._words_and_emoticonsc                 C   sR   d}d}|D ]}|  r|d7 }qt|| }d|  k rFt|k rNn nd}|S )z
        Check whether just some words in the input are ALL CAPS

        :param list words: The words to inspect
        :returns: `True` if some but not all items in `words` are ALL CAPS
        Fr   rS   T)r6   rU   )r   wordsZis_differentZallcap_wordsr$   Zcap_differentialr   r   r   rP   9  s    
zSentiText.allcap_differentialN)r;   r<   r=   r>   r   ra   rN   rP   r   r   r   r   rI     s
   rI   c                   @   sz   e Zd ZdZdddZdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd ZdS )SentimentIntensityAnalyzerz8
    Give a sentiment intensity score to sentences.
    ;sentiment/vader_lexicon.zip/vader_lexicon/vader_lexicon.txtc                 C   s$   t j|| _|  | _t | _d S r   )nltkdataloadlexicon_filemake_lex_dictlexiconr   	constants)r   rl   r   r   r   r   P  s    
z#SentimentIntensityAnalyzer.__init__c                 C   s@   i }| j dD ]*}| ddd \}}t|||< q|S )z6
        Convert lexicon file to a dictionary
        
	r   r   )rl   r^   stripfloat)r   Zlex_dictliner$   Zmeasurer   r   r   rm   X  s
    z(SentimentIntensityAnalyzer.make_lex_dictc                 C   s   t || jj| jj}g }|j}|D ]p}d}||}|t|d k rd| dkrd||d   dkst| | jjkr|	| q"| 
|||||}q"| ||}| ||S )z
        Return a float for sentiment strength based on the input text.
        Positive values are positive valence, negative value are negative
        valence.
        r   rS   kindZof)rI   ro   rH   rG   rO   indexrU   r!   r5   appendsentiment_valence
_but_checkscore_valence)r   rM   	sentitext
sentimentsrO   itemr8   re   r   r   r   polarity_scoresb  s.      


z*SentimentIntensityAnalyzer.polarity_scoresc                 C   s   |j }|j}| }|| jkr| j| }| rX|rX|dkrL|| jj7 }n|| jj8 }tddD ]}	||	krb|||	d    | jkrb| j|||	d   ||}
|	dkr|
dkr|
d }
|	dkr|
dkr|
d }
||
 }| 	|||	|}|	dkrb| 
|||}qb| |||}|| |S )Nr   r   rS   gffffff?r   g?)r9   rO   r!   rn   r6   ro   r7   ranger:   _never_check_idioms_check_least_checkrw   )r   r8   r{   r}   re   r|   r9   rO   Zitem_lowercasestart_isr   r   r   rx     sJ    
     	
z,SentimentIntensityAnalyzer.sentiment_valencec                 C   s   |dkrh||d    | jkrh||d    dkrh||d    dkr||d    dkr|| jj }n>|dkr||d    | jkr||d    dkr|| jj }|S )NrS   r'   r   r(   r
   r   )r!   rn   ro   r@   )r   r8   rO   re   r   r   r   r     s&    z'SentimentIntensityAnalyzer._least_checkc                 C   sr   dd |D }dht |@ }|rn|tt|}t|D ]2\}}||k rX|d ||< q:||kr:|d ||< q:|S )Nc                 S   s   g | ]}|  qS r   r    )r#   Zw_er   r   r   rc     s     z9SentimentIntensityAnalyzer._but_check.<locals>.<listcomp>butg      ?r   )setrv   nextiterrd   )r   rO   r|   r   ZbiZsidxZ	sentimentr   r   r   ry     s    z%SentimentIntensityAnalyzer._but_checkc                 C   s  ||d   d||  }d ||d  ||d  || }||d   d||d   }d ||d  ||d  ||d  }d ||d  ||d  }|||||g}	|	D ] }
|
| jjkr| jj|
 } qqt|d |kr||  d||d   }|| jjkr| jj| }t|d |d krhd || ||d  ||d  }|| jjkrh| jj| }|| jjks|| jjkr|| jj }|S )NrS    z{} {} {}r   r   z{} {})formatro   rA   rU   r5   r?   )r   r8   rO   re   ZonezeroZ
twoonezeroZtwooneZthreetwooneZthreetwo	sequencesseqZzerooneZ
zeroonetwor   r   r   r     sN    





 




z(SentimentIntensityAnalyzer._idioms_checkc                 C   s"  |dkr*| j ||d  gr*|| j j }|dkr||d  dkrl||d  dksb||d  dkrl|d }n&| j |||d   gr|| j j }|dkr||d  dkr||d  dks||d  dks||d  dks||d  dkr|d	 }n(| j |||d   gr|| j j }|S )
Nr   rS   r   r   r	   thisr   r   g      ?)ro   r-   r@   )r   r8   rO   r   re   r   r   r   r     s6    


z'SentimentIntensityAnalyzer._never_checkc                 C   s    |  |}| |}|| }|S r   )_amplify_ep_amplify_qm)r   sum_srM   ep_amplifierqm_amplifierpunct_emph_amplifierr   r   r   _punctuation_emphasis  s    

z0SentimentIntensityAnalyzer._punctuation_emphasisc                 C   s"   | d}|dkrd}|d }|S )Nr      g㥛 ?count)r   rM   Zep_countr   r   r   r   r   &  s
    
z&SentimentIntensityAnalyzer._amplify_epc                 C   s0   | d}d}|dkr,|dkr(|d }nd}|S )Nr   r   rS   r   g
ףp=
?gQ?r   )r   rM   Zqm_countr   r   r   r   r   0  s    

z&SentimentIntensityAnalyzer._amplify_qmc                 C   s`   d}d}d}|D ]D}|dkr,|t |d 7 }|dk rD|t |d 7 }|dkr|d7 }q|||fS )Nr3   r   rS   )rs   )r   r|   pos_sumneg_sum	neu_countZsentiment_scorer   r   r   _sift_sentiment_scores=  s    


z1SentimentIntensityAnalyzer._sift_sentiment_scoresc                 C   s   |rt t|}| ||}|dkr.||7 }n|dk r>||8 }| j|}| |\}}}|t|krr||7 }n|t|k r||8 }|t| | }	t||	 }
t||	 }t||	 }nd}d}
d}d}t|dt|dt|
dt|dd}|S )Nr   r3   r   r   )negneuposcompound)	rs   sumr   ro   r2   r   r/   fabsround)r   r|   rM   r   r   r   r   r   r   totalr   r   r   Zsentiment_dictr   r   r   rz   O  s6    

z(SentimentIntensityAnalyzer.score_valenceN)rh   )r;   r<   r=   r>   r   rm   r~   rx   r   ry   r   r   r   r   r   r   rz   r   r   r   r   rg   K  s    

21
rg   )r>   r/   rB   rE   	itertoolsr   Z	nltk.datari   Z	nltk.utilr   r   rI   rg   r   r   r   r   <module>   s    gD