LSB.py 9 KB
"""
<p>This plugin implements two variants of a well-known steganographic
procedure commonly referred to as LSB algorithm.</p>
The general idea is to
overwrite some portion (usually the k least significant bits) of each byte
in the cover image. The below methods specify the number of overwritten
bits a parameter named word_size. Thus --- in this context --- word means
\"a group of bits of a fixed size\".
"""
import time
import numpy as np
import scipy as sp
from stegotool.util.lookup import Lookup
from stegotool.util.plugins import StegoBase
from stegotool.util.plugins import describe_annotate_convert
from stegotool.util.plugins import ident, ImagePath, FilePath, NewFilePath
import Image


class LSB(StegoBase):
    """ This module has 4 methods:
    <ul>
    <li><i>embed_image</i> and <i>extract_image</i> to embed/extract
    images.</li>
    <li><i>embed_raw_data</i> and <i>extract_raw_data</i> to embed/extract
    any data.</li>
    </ul>
    """

    def __init__(self, ui, core):
        """
        Constructor of the LSB class.
        """
        super(LSB, self).__init__(ui, core)
        self.lookup_tab = Lookup(True)

    @describe_annotate_convert((None, None, ident),
                               ("cover image", ImagePath, str),
                               ("hidden data", FilePath, str),
                               ("stego image", NewFilePath, str),
                               ("word size", [1, 2, 4, 8], int))
    def embed_raw_data(self, src_cover, src_hidden, tgt_stego, word_size):
        """ <p>This method embeds arbitrary data into a cover image.
        Note that the cover image is of course decoded
        before embedding, the secret data however is not.</p>

        <p>Parameters:
        <ol>
        <li><pre>src_cover</pre>
        A valid pathname to an image file which serves as cover image
        (the image which the secret image is embedded into).</li>

        <li><pre>src_hidden</pre>
        A valid pathname to an arbitrary file that is supposed to be
        embedded into the cover image.</li>

        <li><pre>tgt_stego</pre>
        Target pathname of the resulting stego image. You should save to
        a PNG or another lossless format, because many LSBs don't survive
        lossy compression.</li>

        <li><pre>word_size</pre>
        Must be an even divisor of 8, i.e. one of 1, 2, 4, 8. Specifies how
        many least significant bits of each byte in the cover image are used
        for embedding the secret data. The larger this number the more easily
        successful steganalysis can be carried out.</li>

        <li><pre>scatter</pre>
        If true, bytes used for embedding will be evenly distributed over the
        entire cover image. This(slightly) complicates steganalysis. If false,
        consecutive pixels are used for embedding, beginning with the first
        byte in the cover image, until the secret data is exhausted.</li>
        </ol>
        An exception is raised if the secret data is too
        large to be embedded into the cover image.</p>
        """
        t0 = time.time()
        self.ui.set_progress(1)
        word_size = int(word_size)
        cov_img = self.core.media_manager.get_file(src_cover)
        if cov_img and isinstance(cov_img, Image.Image):
            cov_data = sp.misc.fromimage(cov_img)
        elif cov_img and hasattr(cov_img, "data"):
            raise Exception("cover image must be an image")
        else:
            cov_data = sp.misc.fromimage(Image.open(src_cover))

        orig_shape = cov_data.shape
        cov_data = cov_data.ravel()

        hidden_data_suffix = self.core.media_manager.get_file(src_hidden)
        if hidden_data_suffix and hasattr(hidden_data_suffix, 'data'):
            hidden_data_suffix = np.fromstring(hidden_data_suffix.data,
                                               np.uint8)
        elif hidden_data_suffix and hasattr(hidden_data_suffix, 'tmp_file'):
            hidden_data_suffix = np.fromfile(hidden_data_suffix.tmp_file,
                                             np.uint8)
        else:
            hidden_data_suffix = np.fromfile(src_hidden, np.uint8)

        hid_data = np.append(np.zeros(4, np.uint8), hidden_data_suffix)

        # write out the size of the hidden data
        size_hd = np.size(hid_data) - 4

        for i in xrange(4):
            hid_data[i] = size_hd % 256
            size_hd /= 256

        crypto = self.core.crypto_manager.get_instance()
        hid_data = crypto.encrypt(hid_data)
        self.ui.display_status('Encryption of data: %s' % crypto.info())

        if np.size(hid_data) * (8 / word_size) > np.size(cov_data):
            raise Exception("Cover image is too small to embed data. Try " +
                            "increasing the word size or choosing a larger " +
                            "cover image.")

        # converting hid_data to a sequence of words whose length is specified
        # by word_size
        hid_data = self.lookup_tab.to_word_sequence(hid_data, word_size)
        self.ui.set_progress(2)
        embedded = cov_data[:np.size(hid_data)]
        self.ui.set_progress(5)
        embedded = (embedded & ((0xff >> word_size)
                                    << word_size)) | hid_data
        self.ui.set_progress(95)
        cov_data[:np.size(hid_data)] = embedded
        self.ui.set_progress(98)
        cov_data = cov_data.reshape(orig_shape)
        cov_img = sp.misc.toimage(cov_data)

        if self.core.media_manager.is_media_key(tgt_stego):
            self.core.media_manager.put_media(tgt_stego, cov_img)
        else:
            cov_img.save(tgt_stego)

        size_embedded = np.size(hid_data) / (8 / word_size)
        self.ui.set_progress(99)
        self._display_stats("embedded", np.size(cov_data),
                            size_embedded, time.time() - t0)

    @describe_annotate_convert((None, None, ident),
                               ("stego image", ImagePath, str),
                               ("hidden data", NewFilePath, str),
                               ("word size", [1, 2, 4, 8], int))
    def extract_raw_data(self, src_steg, tgt_hidden, word_size):
        """ <p>This method extracts secret data from a stego image. It is
        (obviously) the inverse operation of embed_raw_data.</p>

        <p>Parameters:
        <ol>
        <li><pre>src_stego</pre>
        A valid pathname to an image file which serves as stego image.</li>

        <li><pre>tgt_hidden</pre>
        A pathname denoting where the extracted data should be saved to.</li>

        <li><pre>word_size</pre>
        Number of overwritten bits when the data was embedded.</li>
        </ol>
        An exception is raised if supposed secret data is too large to fit in
        the stego image.</p>
        """
        t0 = time.time()
        self.ui.set_progress(1)
        word_size = int(word_size)
        steg_img = self.core.media_manager.get_file(src_steg)

        if steg_img and isinstance(steg_img, Image.Image):
            steg_data = sp.misc.fromimage(steg_img).ravel()
        elif steg_img and hasattr(steg_img, "data"):
            raise Exception("stego image must be an image, \
                    not an arbitrary data file")
        else:
            steg_data = sp.misc.fromimage(Image.open(src_steg)).ravel()

        # recovering file size
        header_size = 4 * (8 / word_size)
        size_data = steg_data[:header_size] & (0xff >> (8 - word_size))

        size_data = self.lookup_tab.to_byte_sequence(size_data, word_size)
        crypto = self.core.crypto_manager.get_instance()
        size_data = crypto.decrypt(size_data)
        self.ui.display_status('Decryption of size data: %s' % crypto.info())

        size_hd = 0
        for i in xrange(4):
            size_hd += size_data[i] * 256 ** i

        raw_size = size_hd * (8 / word_size)

        if raw_size > np.size(steg_data):
            raise Exception("Supposed secret data too large for stego image.")
        self.ui.set_progress(5)
        hid_data = steg_data[header_size:raw_size + header_size] \
                & (0xff >> (8 - word_size))
        self.ui.set_progress(90)
        hid_data = self.lookup_tab.to_byte_sequence(hid_data, word_size)
        self.ui.set_progress(95)
        final_data = crypto.decrypt(hid_data)
        self.ui.display_status('Decryption of data: %s' % crypto.info())

        self._extract_to_image_pool(tgt_hidden, final_data)

        self.ui.set_progress(99)
        self._display_stats("extracted", np.size(steg_data),
                            np.size(hid_data) + np.size(size_data),
                            time.time() - t0)

    def __str__(self):
        return 'LSB'


# this function might be a candidate for some kind of util module
def _parse_boolean(b):
    """Turns a boolean value into a string if it is not already a boolean.

    @param b: A boolean or a string.

    @return: A boolean value representing b. Every value of b is interpreted
             to be true except the boolean False and any string that satisfies
             a case-insensitive comparison with 'false'.
    """
    return b if isinstance(b, bool) else b.strip.lower() != 'false'