Spaces:

peccavi
/

ai-text-watermarking-model

Sleeping

File size: 27,277 Bytes

060ac52

import random
import torch
from transformers import BertTokenizer, BertForMaskedLM
from nltk.corpus import stopwords
import nltk
from transformers import RobertaTokenizer, RobertaForMaskedLM


# Ensure stopwords are downloaded
try:
    nltk.data.find('corpora/stopwords')
except LookupError:
    nltk.download('stopwords')

class MaskingProcessor:
    # def __init__(self, tokenizer, model):
    def __init__(self):
        # self.tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
        # self.model = BertForMaskedLM.from_pretrained("bert-base-uncased")

        # self.tokenizer = tokenizer
        # self.model = model

        self.tokenizer = BertTokenizer.from_pretrained("bert-large-cased-whole-word-masking")
        self.model = BertForMaskedLM.from_pretrained("bert-large-cased-whole-word-masking")

        # self.tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
        # self.model = RobertaForMaskedLM.from_pretrained("roberta-base")

        self.stop_words = set(stopwords.words('english'))

    def remove_stopwords(self, words):
        """
        Remove stopwords from the given list of words.

        Args:
            words (list): List of words.

        Returns:
            list: List of non-stop words.
        """
        return [word for word in words if word.lower() not in self.stop_words]

    def adjust_ngram_indices(self, original_words, common_ngrams):
        """
        Adjust indices of common n-grams after removing stopwords.

        Args:
            original_words (list): Original list of words.
            common_ngrams (dict): Common n-grams and their indices.

        Returns:
            dict: Adjusted common n-grams with updated indices.
        """
        non_stop_words = self.remove_stopwords(original_words)
        original_to_non_stop = []
        non_stop_idx = 0

        for original_idx, word in enumerate(original_words):
            if word.lower() not in self.stop_words:
                original_to_non_stop.append((original_idx, non_stop_idx))
                non_stop_idx += 1

        adjusted_ngrams = {}
        for ngram, positions in common_ngrams.items():
            adjusted_positions = []
            for start, end in positions:
                try:
                    new_start = next(non_stop for orig, non_stop in original_to_non_stop if orig == start)
                    new_end = next(non_stop for orig, non_stop in original_to_non_stop if orig == end)
                    adjusted_positions.append((new_start, new_end))
                except StopIteration:
                    continue  # Skip if indices cannot be mapped
            adjusted_ngrams[ngram] = adjusted_positions

        return adjusted_ngrams

    def mask_sentence_random(self, sentence, common_ngrams):
        """
        Mask words in the sentence based on the specified rules after removing stopwords.
        """
        # Split sentence into words
        original_words = sentence.split()
        
        # Handle punctuation at the end
        has_punctuation = False
        punctuation = None
        if original_words and any(original_words[-1].endswith(p) for p in ['.', ',', '!', '?', ';', ':']):
            has_punctuation = True
            punctuation = original_words[-1][-1]
            original_words = original_words[:-1]
        
        print(f' ---- original_words : {original_words} ----- ')
        
        # Process words without punctuation
        non_stop_words = self.remove_stopwords(original_words)
        adjusted_ngrams = self.adjust_ngram_indices(original_words, common_ngrams)
        
        # Rest of the existing function code...
        mask_indices = []
        ngram_positions = [pos for positions in adjusted_ngrams.values() for pos in positions]
        
        if ngram_positions:
            first_ngram_start = ngram_positions[0][0]
            if first_ngram_start > 0:
                mask_index_before_ngram = random.randint(0, first_ngram_start-1)
                mask_indices.append(mask_index_before_ngram)

            # Mask words between common n-grams
            for i in range(len(ngram_positions) - 1):
                end_prev = ngram_positions[i][1]
                start_next = ngram_positions[i + 1][0]
                if start_next > end_prev + 1:
                    mask_index_between_ngrams = random.randint(end_prev + 1, start_next - 1)
                    mask_indices.append(mask_index_between_ngrams)

            # Mask a word after the last common n-gram
            last_ngram_end = ngram_positions[-1][1]
            if last_ngram_end < len(non_stop_words) - 1:
                mask_index_after_ngram = random.randint(last_ngram_end + 1, len(non_stop_words) - 1)
                mask_indices.append(mask_index_after_ngram)

        # Create mapping from non-stop words to original indices
        non_stop_to_original = {}
        non_stop_idx = 0
        for orig_idx, word in enumerate(original_words):
            if word.lower() not in self.stop_words:
                non_stop_to_original[non_stop_idx] = orig_idx
                non_stop_idx += 1

        # Map mask indices and apply masks
        original_mask_indices = [non_stop_to_original[idx] for idx in mask_indices]
        masked_words = original_words.copy()
        for idx in original_mask_indices:
            masked_words[idx] = self.tokenizer.mask_token
            # masked_words[idx] = '<mask>' # for roberta

        # Add back punctuation if it existed
        if has_punctuation:
            masked_words.append(punctuation)

        print(f' ***** masked_words at end  : {masked_words} ***** ')
        print(f' ***** original_mask_indices : {original_mask_indices} ***** ')
        print(f' ***** TESTING : {" ".join(masked_words)} ***** ')

        return " ".join(masked_words), original_mask_indices

    def mask_sentence_pseudorandom(self, sentence, common_ngrams):
        """
        Mask words in the sentence based on the specified rules after removing stopwords.
        """
        # Split sentence into words
        random.seed(3)
        original_words = sentence.split()
        
        # Handle punctuation at the end
        has_punctuation = False
        punctuation = None
        if original_words and any(original_words[-1].endswith(p) for p in ['.', ',', '!', '?', ';', ':']):
            has_punctuation = True
            punctuation = original_words[-1][-1]
            original_words = original_words[:-1]
        
        print(f' ---- original_words : {original_words} ----- ')
        
        # Process words without punctuation
        non_stop_words = self.remove_stopwords(original_words)
        adjusted_ngrams = self.adjust_ngram_indices(original_words, common_ngrams)
        
        # Rest of the existing function code...
        mask_indices = []
        ngram_positions = [pos for positions in adjusted_ngrams.values() for pos in positions]
        
        if ngram_positions:
            first_ngram_start = ngram_positions[0][0]
            if first_ngram_start > 0:
                mask_index_before_ngram = random.randint(0, first_ngram_start-1)
                mask_indices.append(mask_index_before_ngram)

            # Mask words between common n-grams
            for i in range(len(ngram_positions) - 1):
                end_prev = ngram_positions[i][1]
                start_next = ngram_positions[i + 1][0]
                if start_next > end_prev + 1:
                    mask_index_between_ngrams = random.randint(end_prev + 1, start_next - 1)
                    mask_indices.append(mask_index_between_ngrams)

            # Mask a word after the last common n-gram
            last_ngram_end = ngram_positions[-1][1]
            if last_ngram_end < len(non_stop_words) - 1:
                mask_index_after_ngram = random.randint(last_ngram_end + 1, len(non_stop_words) - 1)
                mask_indices.append(mask_index_after_ngram)

        # Create mapping from non-stop words to original indices
        non_stop_to_original = {}
        non_stop_idx = 0
        for orig_idx, word in enumerate(original_words):
            if word.lower() not in self.stop_words:
                non_stop_to_original[non_stop_idx] = orig_idx
                non_stop_idx += 1

        # Map mask indices and apply masks
        original_mask_indices = [non_stop_to_original[idx] for idx in mask_indices]
        masked_words = original_words.copy()
        for idx in original_mask_indices:
            masked_words[idx] = self.tokenizer.mask_token
            # masked_words[idx] = '<mask>' # for roberta

        # Add back punctuation if it existed
        if has_punctuation:
            masked_words.append(punctuation)

        print(f' ***** masked_words at end  : {masked_words} ***** ')
        print(f' ***** original_mask_indices : {original_mask_indices} ***** ')
        print(f' ***** TESTING : {" ".join(masked_words)} ***** ')

        return " ".join(masked_words), original_mask_indices


    def calculate_word_entropy(self, sentence, word_position):
        """
        Calculate entropy for a specific word position in the sentence.
        
        Args:
            sentence (str): The input sentence
            word_position (int): Position of the word to calculate entropy for
            
        Returns:
            float: Entropy value for the word
        """
        words = sentence.split()
        masked_words = words.copy()
        masked_words[word_position] = self.tokenizer.mask_token
        masked_sentence = " ".join(masked_words)
        
        input_ids = self.tokenizer(masked_sentence, return_tensors="pt")["input_ids"]
        mask_token_index = torch.where(input_ids == self.tokenizer.mask_token_id)[1]
        
        with torch.no_grad():
            outputs = self.model(input_ids)
            logits = outputs.logits
        
        # Get probabilities for the masked position
        probs = torch.nn.functional.softmax(logits[0, mask_token_index], dim=-1)
        # Calculate entropy: -sum(p * log(p))
        entropy = -torch.sum(probs * torch.log(probs + 1e-9))
        
        return entropy.item()

    def mask_sentence_entropy(self, sentence, common_ngrams):
        """
        Mask words in the sentence based on entropy, following n-gram positioning rules.
        
        Args:
            sentence (str): Original sentence
            common_ngrams (dict): Common n-grams and their indices
            
        Returns:
            str: Masked sentence
        """
        # Split sentence into words
        original_words = sentence.split()
        
        # Handle punctuation at the end
        has_punctuation = False
        punctuation = None
        if original_words and any(original_words[-1].endswith(p) for p in ['.', ',', '!', '?', ';', ':']):
            has_punctuation = True
            punctuation = original_words[-1][-1]
            original_words = original_words[:-1]
        
        # Process words without punctuation
        non_stop_words = self.remove_stopwords(original_words)
        adjusted_ngrams = self.adjust_ngram_indices(original_words, common_ngrams)
        
        # Create mapping from non-stop words to original indices
        non_stop_to_original = {}
        original_to_non_stop = {}
        non_stop_idx = 0
        for orig_idx, word in enumerate(original_words):
            if word.lower() not in self.stop_words:
                non_stop_to_original[non_stop_idx] = orig_idx
                original_to_non_stop[orig_idx] = non_stop_idx
                non_stop_idx += 1

        ngram_positions = [pos for positions in adjusted_ngrams.values() for pos in positions]
        mask_indices = []

        if ngram_positions:
            # Handle words before first n-gram
            first_ngram_start = ngram_positions[0][0]
            if first_ngram_start > 0:
                candidate_positions = range(0, first_ngram_start)
                entropies = [(pos, self.calculate_word_entropy(sentence, non_stop_to_original[pos]))
                            for pos in candidate_positions]
                mask_indices.append(max(entropies, key=lambda x: x[1])[0])

            # Handle words between n-grams
            for i in range(len(ngram_positions) - 1):
                end_prev = ngram_positions[i][1]
                start_next = ngram_positions[i + 1][0]
                if start_next > end_prev + 1:
                    candidate_positions = range(end_prev + 1, start_next)
                    entropies = [(pos, self.calculate_word_entropy(sentence, non_stop_to_original[pos]))
                                for pos in candidate_positions]
                    mask_indices.append(max(entropies, key=lambda x: x[1])[0])

            # Handle words after last n-gram
            last_ngram_end = ngram_positions[-1][1]
            if last_ngram_end < len(non_stop_words) - 1:
                candidate_positions = range(last_ngram_end + 1, len(non_stop_words))
                entropies = [(pos, self.calculate_word_entropy(sentence, non_stop_to_original[pos]))
                            for pos in candidate_positions]
                mask_indices.append(max(entropies, key=lambda x: x[1])[0])

        # Map mask indices to original sentence positions and apply masks
        original_mask_indices = [non_stop_to_original[idx] for idx in mask_indices]
        masked_words = original_words.copy()
        for idx in original_mask_indices:
            masked_words[idx] = self.tokenizer.mask_token

        # Add back punctuation if it existed
        if has_punctuation:
            masked_words.append(punctuation)

        return " ".join(masked_words), original_mask_indices

    def calculate_mask_logits(self, original_sentence, original_mask_indices):
        """
        Calculate logits for masked tokens in the sentence using BERT.

        Args:
            original_sentence (str): Original sentence without masks
            original_mask_indices (list): List of indices to mask

        Returns:
            dict: Masked token indices and their logits
        """
        print('==========================================================================================================')
        words = original_sentence.split()
        print(f' ##### calculate_mask_logits >> words : {words} ##### ')
        mask_logits = {}

        for idx in original_mask_indices:
            # Create a copy of words and mask the current position
            print(f' ---- idx : {idx} ----- ')
            masked_words = words.copy()
            masked_words[idx] = '[MASK]'
            # masked_words[idx] = '<mask>' # for roberta
            masked_sentence = " ".join(masked_words)
            print(f' ---- masked_sentence : {masked_sentence} ----- ')

            # Calculate logits for the current mask
            input_ids = self.tokenizer(masked_sentence, return_tensors="pt")["input_ids"]
            mask_token_index = torch.where(input_ids == self.tokenizer.mask_token_id)[1]

            with torch.no_grad():
                outputs = self.model(input_ids)
                logits = outputs.logits

            # Extract logits for the masked position
            mask_logits_tensor = logits[0, mask_token_index, :] 

            # Get top logits and corresponding tokens
            top_mask_logits, top_mask_indices = torch.topk(mask_logits_tensor, 100, dim=-1)  # Get more candidates

            # Convert token IDs to words and filter out subword tokens
            top_tokens = []
            top_logits = []
            seen_words = set()  # To keep track of unique words

            for token_id, logit in zip(top_mask_indices[0], top_mask_logits[0]):
                token = self.tokenizer.convert_ids_to_tokens(token_id.item())
                
                # Skip if it's a subword token (starts with ##)
                if token.startswith('##'):
                    continue
                    
                # Convert token to proper word
                word = self.tokenizer.convert_tokens_to_string([token]).strip()
                
                # Only add if it's a new word and not empty
                if word and word not in seen_words:
                    seen_words.add(word)
                    top_tokens.append(word)
                    top_logits.append(logit.item())
                    
                    # Break if we have 50 unique complete words
                    if len(top_tokens) == 50:
                        break

            # print(f' ---- top_tokens : {top_tokens} ----- ')

            # Store results
            mask_logits[idx] = {
                "tokens": top_tokens,
                "logits": top_logits
            }

        return mask_logits

    # def calculate_mask_logits(self, original_sentence, original_mask_indices):
    #     """
    #     Calculate logits for masked tokens in the sentence using BERT.

    #     Args:
    #         original_sentence (str): Original sentence without masks
    #         original_mask_indices (list): List of indices to mask

    #     Returns:
    #         dict: Masked token indices and their logits
    #     """
    #     words = original_sentence.split()
    #     print(f' ##### calculate_mask_logits >> words : {words} ##### ')
    #     mask_logits = {}

    #     for idx in original_mask_indices:
    #         # Create a copy of words and mask the current position
    #         print(f' ---- idx : {idx} ----- ')
    #         masked_words = words.copy()
    #         print(f' ---- words : {masked_words} ----- ')
    #         # masked_words[idx] = self.tokenizer.mask_token
    #         masked_words[idx] = '[MASK]'
    #         print(f' ---- masked_words : {masked_words} ----- ')
    #         masked_sentence = " ".join(masked_words)
    #         print(f' ---- masked_sentence : {masked_sentence} ----- ')

    #         # Calculate logits for the current mask
    #         input_ids = self.tokenizer(masked_sentence, return_tensors="pt")["input_ids"]
    #         mask_token_index = torch.where(input_ids == self.tokenizer.mask_token_id)[1]

    #         with torch.no_grad():
    #             outputs = self.model(input_ids)
    #             logits = outputs.logits

    #         # Extract logits for the masked position
    #         mask_logits_tensor = logits[0, mask_token_index, :] 

    #         # Get top 50 logits and corresponding tokens
    #         top_mask_logits, top_mask_indices = torch.topk(mask_logits_tensor, 50, dim=-1)

    #         # Convert token IDs to words
    #         top_tokens = [self.tokenizer.convert_ids_to_tokens(token_id.item()) for token_id in top_mask_indices[0]]
    #         print(f' ---- top_tokens : {top_tokens} ----- ')

    #         # Store results
    #         mask_logits[idx] = {
    #             "tokens": top_tokens,
    #             "logits": top_mask_logits.tolist()
    #         }

    #     return mask_logits


    def process_sentences(self, sentences, result_dict, method="random"):
        """
        Process sentences and calculate logits for masked tokens.
        """
        results = {}

        for sentence, ngrams in result_dict.items():
            # Split punctuation from the last word before processing
            words = sentence.split()
            last_word = words[-1]
            if any(last_word.endswith(p) for p in ['.', ',', '!', '?', ';', ':']):
                # Split the last word and punctuation
                words[-1] = last_word[:-1]
                punctuation = last_word[-1]
                # Rejoin with space before punctuation to treat it as separate token
                processed_sentence = " ".join(words) + " " + punctuation
            else:
                processed_sentence = sentence
            
            if method == "random":
                masked_sentence, original_mask_indices = self.mask_sentence_random(processed_sentence, ngrams)
            elif method == "pseudorandom":
                masked_sentence, original_mask_indices = self.mask_sentence_pseudorandom(processed_sentence, ngrams)
            else:  # entropy
                masked_sentence, original_mask_indices = self.mask_sentence_entropy(processed_sentence, ngrams)
                
            logits = self.calculate_mask_logits(processed_sentence, original_mask_indices)
            results[sentence] = {
                "masked_sentence": masked_sentence,
                "mask_logits": logits
            }

        return results



if __name__ == "__main__":
    # !!! Working both the cases regardless if the stopword is removed or not 
    sentences = [
        "The quick brown fox jumps over small cat the lazy dog everyday again and again .",
        # "A speedy brown fox jumps over a lazy dog.",
        # "A swift brown fox leaps over the lethargic dog."

    ]
    result_dict ={
        'The quick brown fox jumps over small cat the lazy dog everyday again and again .': {'brown fox': [(2, 3)],'cat': [(7, 7)], 'dog': [(10, 10)]}, 
        # 'A speedy brown fox jumps over a lazy dog.': {'brown fox': [(2, 3)], 'dog': [(8, 8)]}, 
        # 'A swift brown fox leaps over the lethargic dog.': {'brown fox': [(2, 3)], 'dog': [(8, 8)]}
    }


    processor = MaskingProcessor()
    # results_random = processor.process_sentences(sentences, result_dict)
    results_entropy = processor.process_sentences(sentences, result_dict, method="random")

    '''
        results structure : 
        results = {
                    "The quick brown fox jumps over the lazy dog everyday.": 
                    {  # Original sentence as key
                        "masked_sentence": str,  # The sentence with [MASK] tokens
                        "mask_logits": 
                        {  # Dictionary of mask positions and their predictions
                            1: 
                                {  # Position of mask in sentence
                                    "tokens" (words) : list,  # List of top 50 predicted tokens
                                    "logits" (probabilities) : list   # Corresponding logits for those tokens
                                },
                            7: 
                                {
                                    "tokens" (words) : list,
                                    "logits" (probabilities) : list
                                },
                            10: 
                                {
                                    "tokens (words)": list,
                                    "logits (probabilities)": list
                                }
                        }
                    }
                }

    '''
    # results_entropy = processor.process_sentences(sentences, result_dict, method="entropy", remove_stopwords=False)

    for sentence, output in results_entropy.items():
        print(f"Original Sentence (Random): {sentence}")
        print(f"Masked Sentence (Random): {output['masked_sentence']}")
        # print(f"Mask Logits (Random): {output['mask_logits']}")
        # print(f' type(output["mask_logits"]) : {type(output["mask_logits"])}')
        # print(f' length of output["mask_logits"] : {len(output["mask_logits"])}')
        # print(f' output["mask_logits"].keys() : {output["mask_logits"].keys()}')
        # print('--------------------------------')
        # for mask_idx, logits in output["mask_logits"].items():
        #     print(f"Logits for [MASK] at position {mask_idx}:")
        #     print(f' logits : {logits[:5]}')  # List of logits for all vocabulary tokens
        #     print(f' len(logits) : {len(logits)}')


# ------------------------------------------------------------------------------------------------
    # def mask_sentence_random(self, sentence, common_ngrams):
    #     """
    #     Mask words in the sentence based on the specified rules after removing stopwords.
    #     """
    #     original_words = sentence.split()
    #     # print(f' ---- original_words : {original_words} ----- ')
    #     non_stop_words = self.remove_stopwords(original_words)
    #     # print(f' ---- non_stop_words : {non_stop_words} ----- ')
    #     adjusted_ngrams = self.adjust_ngram_indices(original_words, common_ngrams)
    #     # print(f' ---- common_ngrams : {common_ngrams} ----- ')
    #     # print(f' ---- adjusted_ngrams : {adjusted_ngrams} ----- ')

    #     mask_indices = []

    #     # Extract n-gram positions in non-stop words
    #     ngram_positions = [pos for positions in adjusted_ngrams.values() for pos in positions]
        
    #     # Mask a word before the first common n-gram
    #     if ngram_positions:
    #         # print(f' ---- ngram_positions : {ngram_positions} ----- ')
    #         first_ngram_start = ngram_positions[0][0]
    #         # print(f' ---- first_ngram_start : {first_ngram_start} ----- ')
    #         if first_ngram_start > 0:
    #             mask_index_before_ngram = random.randint(0, first_ngram_start-1)
    #             # print(f' ---- mask_index_before_ngram : {mask_index_before_ngram} ----- ')
    #             mask_indices.append(mask_index_before_ngram)

    #         # Mask words between common n-grams
    #         for i in range(len(ngram_positions) - 1):
    #             end_prev = ngram_positions[i][1]
    #             # print(f' ---- end_prev : {end_prev} ----- ')
    #             start_next = ngram_positions[i + 1][0]
    #             # print(f' ---- start_next : {start_next} ----- ')
    #             if start_next > end_prev + 1:
    #                 mask_index_between_ngrams = random.randint(end_prev + 1, start_next - 1)
    #                 # print(f' ---- mask_index_between_ngrams : {mask_index_between_ngrams} ----- ')
    #                 mask_indices.append(mask_index_between_ngrams)

    #         # Mask a word after the last common n-gram
    #         last_ngram_end = ngram_positions[-1][1]
    #         if last_ngram_end < len(non_stop_words) - 1:
    #             # print(f' ---- last_ngram_end : {last_ngram_end} ----- ')
    #             mask_index_after_ngram = random.randint(last_ngram_end + 1, len(non_stop_words) - 1)
    #             # print(f' ---- mask_index_after_ngram : {mask_index_after_ngram} ----- ')
    #             mask_indices.append(mask_index_after_ngram)

    #     # Create mapping from non-stop words to original indices
    #     non_stop_to_original = {}
    #     non_stop_idx = 0
    #     for orig_idx, word in enumerate(original_words):
    #         if word.lower() not in self.stop_words:
    #             non_stop_to_original[non_stop_idx] = orig_idx
    #             non_stop_idx += 1

    #     # Map mask indices from non-stop word positions to original positions
    #     # print(f' ---- non_stop_to_original : {non_stop_to_original} ----- ')
    #     original_mask_indices = [non_stop_to_original[idx] for idx in mask_indices]
    #     # print(f' ---- original_mask_indices : {original_mask_indices} ----- ')

    #     # Apply masks to the original sentence
    #     masked_words = original_words.copy()
    #     for idx in original_mask_indices:
    #         masked_words[idx] = self.tokenizer.mask_token

    #     return " ".join(masked_words), original_mask_indices