diff --git a/mdata/ILSVRC.py b/mdata/ILSVRC.py index c1dfa1f..ed04903 100644 --- a/mdata/ILSVRC.py +++ b/mdata/ILSVRC.py @@ -33,7 +33,8 @@ package_dir = os.path.dirname(os.path.abspath(__file__)) class DataILSVRC(DataDumperBase): - def __init__(self, base_dir='/media/chunk/Elements/D/data/ImageNet/img/ILSVRC2013_DET_val', category='Train'): + def __init__(self, base_dir='/media/chunk/Elements/D/data/ImageNet/img/ILSVRC2013_DET_val', category='Train', + host='HPC-server'): DataDumperBase.__init__(self, base_dir, category) self.base_dir = base_dir @@ -45,6 +46,10 @@ class DataILSVRC(DataDumperBase): self.feat_dir = os.path.join(self.dst_dir, 'Feat') self.img_dir = os.path.join(self.dst_dir, 'Img') + self.host = host + self.master = 'spark://%s:7077' % self.host + self.appname = 'ImageILSVRC' + self.dict_data = {} self.table_name = self.base_dir.strip('/').split('/')[-1] + '-' + self.category @@ -316,7 +321,7 @@ class DataILSVRC(DataDumperBase): return self.table if self.connection is None: - c = happybase.Connection('HPC-server') + c = happybase.Connection(host=self.host) self.connection = c tables = self.connection.tables() @@ -345,7 +350,7 @@ class DataILSVRC(DataDumperBase): table_name = self.table_name if self.connection is None: - c = happybase.Connection('HPC-server') + c = happybase.Connection(host=self.host) self.connection = c tables = self.connection.tables() @@ -506,7 +511,7 @@ class DataILSVRC(DataDumperBase): elif mode == "spark": # cluster if self.sparker == None: - self.sparker = SC.Sparker(host='HPC-server', appname='ImageCV', master='spark://HPC-server:7077') + self.sparker = SC.Sparker(host=self.host, appname=self.appname, master=self.master) result = self.sparker.read_hbase(self.table_name) # result = {key:[feat,tag],...} for feat, tag in result: diff --git a/mdata/ILSVRC_S.py b/mdata/ILSVRC_S.py index e98089a..1734990 100644 --- a/mdata/ILSVRC_S.py +++ b/mdata/ILSVRC_S.py @@ -39,7 +39,7 @@ class DataILSVRC_S(DataDumperBase): copyright(c) 2015 chunkplus@gmail.com """ - def __init__(self, base='ILSVRC2013_DET_val', category='Train_1', tablename=None): + def __init__(self, base='ILSVRC2013_DET_val', category='Train_1', host='HPC-server', tablename=None): DataDumperBase.__init__(self, base, category) self.base = base @@ -55,8 +55,11 @@ class DataILSVRC_S(DataDumperBase): else: self.table_name = tablename - self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', - master='spark://HPC-server:7077') + self.host = host + self.master = 'spark://%s:7077' % self.host + self.appname = 'ImageILSVRC-S' + self.sparker = SC.Sparker(host=self.host, appname=self.appname, + master=self.master) self.steger = F5.F5(sample_key, 1) @@ -67,7 +70,7 @@ class DataILSVRC_S(DataDumperBase): return self.table if self.connection is None: - c = happybase.Connection('HPC-server') + c = happybase.Connection(host=self.host) self.connection = c tables = self.connection.tables() @@ -91,7 +94,7 @@ class DataILSVRC_S(DataDumperBase): table_name = self.table_name if self.connection is None: - c = happybase.Connection('HPC-server') + c = happybase.Connection(host=self.host) self.connection = c tables = self.connection.tables() @@ -251,8 +254,8 @@ class DataILSVRC_S(DataDumperBase): elif mode == 'spark': if self.sparker == None: - self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', - master='spark://HPC-server:7077') + self.sparker = SC.Sparker(host=self.host, appname=self.appname, + master=self.master) cols = [ 'cf_pic:data', @@ -285,8 +288,8 @@ class DataILSVRC_S(DataDumperBase): withdata=withdata) elif mode == 'analysis': if self.sparker == None: - self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', - master='spark://HPC-server:7077') + self.sparker = SC.Sparker(host=self.host, appname=self.appname, + master=self.master) cols = [ 'cf_pic:data', @@ -401,8 +404,8 @@ class DataILSVRC_S(DataDumperBase): elif mode == 'spark': if self.sparker == None: - self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', - master='spark://HPC-server:7077') + self.sparker = SC.Sparker(host=self.host, appname=self.appname, + master=self.master) cols = [ 'cf_pic:data', @@ -496,8 +499,8 @@ class DataILSVRC_S(DataDumperBase): elif mode == 'spark': if self.sparker == None: - self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', - master='spark://HPC-server:7077') + self.sparker = SC.Sparker(host=self.host, appname=self.appname, + master=self.master) cols = [ 'cf_pic:data', @@ -532,8 +535,8 @@ class DataILSVRC_S(DataDumperBase): def _analysis(self, mode='analysis', feattype='ibd', readforward=False, writeback=True, withdata=False): if mode == 'analysis': if self.sparker == None: - self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', - master='spark://HPC-server:7077') + self.sparker = SC.Sparker(host=self.host, appname=self.appname, + master=self.master) cols = [ 'cf_pic:data', @@ -618,8 +621,8 @@ class DataILSVRC_S(DataDumperBase): elif mode == "spark" or mode == "cluster": if self.sparker == None: - self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', - master='spark://HPC-server:7077') + self.sparker = SC.Sparker(host=self.host, appname=self.appname, + master=self.master) rdd_dataset = self.sparker.read_hbase(self.table_name, func=rdd.rddparse_dataset_ILS, collect=False) if not collect: diff --git a/msteg/steganalysis/ChiSquare.py b/msteg/steganalysis/ChiSquare.py deleted file mode 100644 index 7d0bb2c..0000000 --- a/msteg/steganalysis/ChiSquare.py +++ /dev/null @@ -1,162 +0,0 @@ -""" -

-This module implements an algorithm described by Andreas Westfeld in [1,2], -which detects if there was data embedded into an image using JSteg. -It uses the property that JSteg generates pairs of values in the -DCT-coefficients histogram, which can be detected by a \chi^2 test. -

- -
-[1]: Andreas Westfeld, F5 - A Steganographic Algorithm High Capacity Despite
-Better Steganalysis
-[2]: Andreas Westfeld, Angriffe auf steganographische Systeme
-
-""" - -from collections import defaultdict -import os - -from PIL import Image -import numpy -from scipy.stats import chisquare -import matplotlib.pyplot as plt -import itertools as it - -from .. import * - - -class ChiSquare(StegBase): - """ - The module contains only one method, detect. - """ - - def __init__(self, ui, core): - self.ui = ui - self.core = core - - def detect(self, src, tgt, tgt2): - """ -

- Detect if there was data embedded in the source image image with - JSteg algorithm. -

- -

- Parameters: -

    -
  1. Source image
    Image which should be tested
  2. -
  3. Target image
    Image which displays a graphic with the - embedding probability
  4. -
  5. 2nd Target image
    Image which displays the embedding - positions in the image
  6. -
-

- """ - # --------------------------- Input ----------------------------------- - # If src is from the image pool, test whether the image exists encoded - # on the file system. Otherwise we can not read DCT-coefficients. - if self.core.media_manager.is_media_key(src): - src = self.core.media_manager.get_file(src) - if hasattr(src, 'tmp_file'): - src = src.tmp_file - self.ui.display_error('Trying file: %s' % src) - else: - self.ui.display_error('Can not detect anything from \ - decoded images.') - return - # Test whether the file exists. - if not os.path.isfile(src): - self.ui.display_error('No such file.') - return - # Test if it is a JPEG file. - if not self._looks_like_jpeg(src): - self.ui.display_error('Input is probably not a JPEG file.') - return - - # ---------------------------- Algorithm ------------------------------ - # Build DCT-histogram in steps of \approx 1% of all coefficients and - # calculate the p-value at each step. - - # dct_data = rw_dct.read_dct_coefficients(src) - dct_data = self._get_cov_data(src) - - hist = defaultdict(int) - cnt = 0 - l = len(dct_data) - one_p = l / 100 - result = [] - for block in dct_data: - # update the histogram with one block of 64 coefficients - for c in block: - hist[c] += 1 - - cnt += 1 - if not cnt % one_p: - # calculate p-value - self.ui.set_progress(cnt * 100 / l) - - # ignore the pair (0, 1), since JSteg does not embed data there - hl = [hist[i] for i in range(-2048, 2049) if not i in (0, 1)] - k = len(hl) / 2 - observed = [] - expected = [] - # calculate observed and expected distribution - for i in range(k): - t = hl[2 * i] + hl[2 * i + 1] - if t > 3: - observed.append(hl[2 * i]) - expected.append(t / 2) - # calculate (\chi^2, p) - p = chisquare(numpy.array(observed), numpy.array(expected))[1] - result.append(p) - - # ----------------------------- Output -------------------------------- - # Graph displaying the embedding probabilities in relation to the - # sample size. - figure = plt.figure() - plot = figure.add_subplot(111) - plot.grid(True) - plot.plot(result, color='r', linewidth=2.0) - plt.axis([0, 100, 0, 1.1]) - plt.title('Embedding probability for different percentages \ -of the file capacity.') - plt.xlabel('% of file capacity') - plt.ylabel('Embedding probability') - - if self.core.media_manager.is_media_key(tgt): - img = figure_to_pil(figure) - self.core.media_manager.put_media(tgt, img) - else: - plt.savefig(tgt) - - # Image displaying the length and position of the embedded data - # within the image - img2 = Image.open(src) - img2.convert("RGB") - width, height = img2.size - - for i in range(100): - result[i] = max(result[i:]) - - cnt2 = 0 - for (top, left) in it.product(range(0, height, 8), range(0, width, 8)): - if not cnt2 % one_p: - r = result[cnt2 / one_p] - if r >= 0.5: - color = (255, int((1 - r) * 2 * 255), 0) - else: - color = (int(r * 2 * 255), 255, 0) - cnt2 += 1 - img2.paste(color, (left, top, min(left + 8, width), - min(top + 8, height))) - self.core.media_manager.put_media(tgt2, img2) - - def __str__(self): - return 'Chi-Square-Test' - - -def figure_to_pil(figure): - figure.canvas.draw() - return Image.fromstring('RGB', - figure.canvas.get_width_height(), - figure.canvas.tostring_rgb()) diff --git a/test/test_jpeg.py b/test/test_jpeg.py index e3aa247..fbbaf60 100644 --- a/test/test_jpeg.py +++ b/test/test_jpeg.py @@ -1,8 +1,6 @@ __author__ = 'chunk' import numpy as np -import matplotlib.pyplot as plt -import seaborn as sns from .. import mjpeg from ..mjpeg import base from ..msteg.steganography import LSB, F3, F4, F5 @@ -24,9 +22,6 @@ sample_key = [46812L, 20559L, 31360L, 16681L, 27536L, 39553L, 5427L, 63029L, 565 61908L, 63014L, 5908L, 59816L, 56765L] -# plt.ticklabel_format(style='sci', axis='both', scilimits=(1, 4)) - -plt.ticklabel_format(style='sci', axis='both') package_dir = os.path.dirname(os.path.abspath(__file__)) @@ -182,38 +177,6 @@ def test_jpeg(): print mjpeg.diffblocks(ima, imc) -def test_hist(): - ima = mjpeg.Jpeg(os.path.join(package_dir, "../res/high/pic3_orig.jpg"), key=sample_key) - print ima.getQuality() - print ima.getCapacity('All') - - capacity = ima.getCapacity() - print capacity - rate = 0.65 - hidden = np.random.bytes(int(int(capacity) * rate) / 8) - steger = F5.F5(sample_key, 1) - steger2 = F4.F4(key=None) - steger3 = LSB.LSB(key=None) - embed_rate = steger3.embed_raw_data(os.path.join(package_dir, "../res/high/pic3_orig.jpg"), - hidden, - os.path.join(package_dir, "../res/high/pic3_dest.jpg"), - frommem=True) - # - print embed_rate - - imb = mjpeg.Jpeg(os.path.join(package_dir, "../res/high/pic3_dest.jpg"), key=sample_key) - print imb.getQuality() - print imb.getCapacity('All') - - A = imb.rawsignal().tolist() - E = [i for i in range(-8, 10)] - plt.hist(A, E, histtype='bar', rwidth=0.8, align='left') - plt.xlabel("JPEG coefficients after quantisation") - plt.ylabel("Frequency") - plt.xticks([i for i in range(-8, 9)]) - plt.ylim(ymax=300000) - plt.show() - if __name__ == '__main__': # timer.mark() -- libgit2 0.21.2