diff --git a/mdata/ILSVRC.py b/mdata/ILSVRC.py index ff4fb4d..c1dfa1f 100644 --- a/mdata/ILSVRC.py +++ b/mdata/ILSVRC.py @@ -1,8 +1,8 @@ __author__ = 'chunk' from . import * -from ..mfeat import HOG, IntraBlockDiff -from ..mspark import SC +from ..mfeat import IntraBlockDiff +from ..mspark import rdd, SC from ..common import * import os, sys @@ -83,11 +83,11 @@ class DataILSVRC(DataDumperBase): pass def get_feat(self, image, feattype='ibd', **kwargs): - size = kwargs.get('size', (48, 48)) - - if feattype == 'hog': - feater = HOG.FeatHOG(size=size) - elif feattype == 'ibd': + # size = kwargs.get('size', (48, 48)) + # + # if feattype == 'hog': + # feater = HOG.FeatHOG(size=size) + if feattype == 'ibd': feater = IntraBlockDiff.FeatIntraBlockDiff() else: raise Exception("Unknown feature type!") @@ -99,9 +99,9 @@ class DataILSVRC(DataDumperBase): def extract_feat(self, feattype='ibd'): print "extracting feat..." - if feattype == 'hog': - feater = HOG.FeatHOG(size=(48, 48)) - elif feattype == 'ibd': + # if feattype == 'hog': + # feater = HOG.FeatHOG(size=(48, 48)) + if feattype == 'ibd': feater = IntraBlockDiff.FeatIntraBlockDiff() else: raise Exception("Unknown feature type!") @@ -307,7 +307,7 @@ class DataILSVRC(DataDumperBase): # cv2.imwrite(os.path.join(base_dir, category + '_crop_cv', name), img_crop) # except Exception as e: # print '[EXCPT]', e - # pass + # pass def get_table(self): @@ -322,10 +322,10 @@ class DataILSVRC(DataDumperBase): tables = self.connection.tables() if self.table_name not in tables: families_compressed = {'cf_pic': dict(compression='LZO'), - 'cf_info': dict(max_versions=10,compression='LZO'), - 'cf_tag': dict(compression='LZO'), - 'cf_feat': dict(compression='LZO'), - } + 'cf_info': dict(max_versions=10, compression='LZO'), + 'cf_tag': dict(compression='LZO'), + 'cf_feat': dict(compression='LZO'), + } families = {'cf_pic': dict(), 'cf_info': dict(max_versions=10), 'cf_tag': dict(), diff --git a/mdata/ILSVRC_S.py b/mdata/ILSVRC_S.py index 4d87a20..e98089a 100644 --- a/mdata/ILSVRC_S.py +++ b/mdata/ILSVRC_S.py @@ -1,8 +1,8 @@ __author__ = 'chunk' from . import * -from ..mfeat import HOG, IntraBlockDiff -from ..mspark import SC +from ..mfeat import IntraBlockDiff +from ..mspark import rdd, SC from pyspark.mllib.regression import LabeledPoint from ..common import * @@ -135,11 +135,11 @@ class DataILSVRC_S(DataDumperBase): tmpf.close() def _get_feat(self, image, feattype='ibd', **kwargs): - size = kwargs.get('size', (48, 48)) - - if feattype == 'hog': - feater = HOG.FeatHOG(size=size) - elif feattype == 'ibd': + # size = kwargs.get('size', (48, 48)) + # + # if feattype == 'hog': + # feater = HOG.FeatHOG(size=size) + if feattype == 'ibd': feater = IntraBlockDiff.FeatIntraBlockDiff() else: raise Exception("Unknown feature type!") @@ -267,16 +267,16 @@ class DataILSVRC_S(DataDumperBase): ] # # Debug - # tmp_data = self.sparker.read_hbase(self.table_name, func=SC.rddparse_data_ILS, + # tmp_data = self.sparker.read_hbase(self.table_name, func=rdd.rddparse_data_ILS, # collect=False) - # # tmp_data = tmp_data.mapValues(lambda data: [data] + SC.rddinfo_ILS(data)) + # # tmp_data = tmp_data.mapValues(lambda data: [data] + rdd.rddinfo_ILS(data)) # print tmp_data.collect()[0][1] # return - self.rdd_data = self.sparker.read_hbase(self.table_name, func=SC.rddparse_data_ILS, + self.rdd_data = self.sparker.read_hbase(self.table_name, func=rdd.rddparse_data_ILS, collect=False).mapValues( - lambda data: [data] + SC.rddinfo_ILS(data)) + lambda data: [data] + rdd.rddinfo_ILS(data)) if not writeback: return self.rdd_data @@ -293,14 +293,14 @@ class DataILSVRC_S(DataDumperBase): ] # # Debug - # tmp_data = self.sparker.read_hbase(self.table_name, func=SC.rddparse_data_ILS, + # tmp_data = self.sparker.read_hbase(self.table_name, func=rdd.rddparse_data_ILS, # collect=False) - # # tmp_data = tmp_data.mapValues(lambda data: [data] + SC.rddinfo_ILS(data)) + # # tmp_data = tmp_data.mapValues(lambda data: [data] + rdd.rddinfo_ILS(data)) # print tmp_data.collect()[0][1] # return - self.rdd_data = self.sparker.read_hbase(self.table_name, func=SC.rddparse_data_ILS, + self.rdd_data = self.sparker.read_hbase(self.table_name, func=rdd.rddparse_data_ILS, collect=False).mapValues( lambda data: [data]) @@ -417,12 +417,12 @@ class DataILSVRC_S(DataDumperBase): ] if readforward: - self.rdd_data = self.sparker.read_hbase(self.table_name, func=SC.rddparse_all_ILS, collect=False) + self.rdd_data = self.sparker.read_hbase(self.table_name, func=rdd.rddparse_all_ILS, collect=False) - # rdd_data_ext = self.rdd_data.map(lambda x: SC.rddembed_ILS(x, rate=rate)).filter(lambda x: x != None) + # rdd_data_ext = self.rdd_data.map(lambda x: rdd.rddembed_ILS(x, rate=rate)).filter(lambda x: x != None) # self.rdd_data = self.rdd_data.union(rdd_data_ext) - self.rdd_data = self.rdd_data.flatMap(lambda x: SC.rddembed_ILS_EXT(x, rate=rate)) + self.rdd_data = self.rdd_data.flatMap(lambda x: rdd.rddembed_ILS_EXT(x, rate=rate)) if not writeback: return self.rdd_data else: @@ -513,9 +513,9 @@ class DataILSVRC_S(DataDumperBase): ] if readforward: - self.rdd_data = self.sparker.read_hbase(self.table_name, func=SC.rddparse_all_ILS, collect=False) + self.rdd_data = self.sparker.read_hbase(self.table_name, func=rdd.rddparse_all_ILS, collect=False) - self.rdd_data = self.rdd_data.mapValues(lambda items: SC.rddfeat_ILS(items, feattype)) + self.rdd_data = self.rdd_data.mapValues(lambda items: rdd.rddfeat_ILS(items, feattype)) # print self.rdd_data.collect()[0] # return @@ -541,9 +541,9 @@ class DataILSVRC_S(DataDumperBase): ] if readforward: - self.rdd_data = self.sparker.read_hbase(self.table_name, func=SC.rddparse_all_ILS, collect=False) + self.rdd_data = self.sparker.read_hbase(self.table_name, func=rdd.rddparse_all_ILS, collect=False) - self.rdd_data = self.rdd_data.mapValues(lambda items: SC.rddanalysis_ILS(items)) + self.rdd_data = self.rdd_data.mapValues(lambda items: rdd.rddanalysis_ILS(items)) # print self.rdd_data.collect()[0] # return @@ -621,7 +621,7 @@ class DataILSVRC_S(DataDumperBase): self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', master='spark://HPC-server:7077') - rdd_dataset = self.sparker.read_hbase(self.table_name, func=SC.rddparse_dataset_ILS, collect=False) + rdd_dataset = self.sparker.read_hbase(self.table_name, func=rdd.rddparse_dataset_ILS, collect=False) if not collect: rdd_dataset = rdd_dataset.map(lambda x: LabeledPoint(x[0], x[1])) return rdd_dataset diff --git a/mmodel/svm/SVM.py b/mmodel/svm/SVM.py index a00cf8b..e39f351 100644 --- a/mmodel/svm/SVM.py +++ b/mmodel/svm/SVM.py @@ -9,7 +9,7 @@ import os, sys from ...mfeat import * from ...mmodel import * from ...mmodel.svm.svmutil import * -from ...mspark import SC2 +from ...mspark import SC from ...common import * import numpy as np @@ -191,7 +191,7 @@ class ModelSVM(ModelBase): def _train_spark(self, X, Y=None): if self.sparker == None: - self.sparker = SC2.Sparker(host='HPC-server', appname='ImageCV', master='spark://HPC-server:7077') + self.sparker = SC.Sparker(host='HPC-server', appname='ImageCV', master='spark://HPC-server:7077') self.model = self.sparker.train_svm(X, Y) diff --git a/mspark/SC.py b/mspark/SC.py index 72ce1b3..35e90d9 100644 --- a/mspark/SC.py +++ b/mspark/SC.py @@ -4,349 +4,20 @@ __author__ = 'chunk' from ..common import * from .dependencies import * from . import * -# from ..mdata import MSR, CV, ILSVRC, ILSVRC_S - -from ..mjpeg import * -from ..msteg import * -from ..msteg.steganography import LSB, F3, F4, F5 -from ..mfeat import IntraBlockDiff -from ..mmodel.svm import SVM2 +from .rdd import * import sys from pyspark import RDD from pyspark import SparkConf, SparkContext from pyspark.mllib.classification import LogisticRegressionWithSGD, SVMWithSGD from pyspark.mllib.regression import LabeledPoint -from numpy import array -import json -import pickle -import tempfile + import numpy as np -from scipy import stats -from hashlib import md5 + np.random.seed(sum(map(ord, "whoami"))) package_dir = os.path.dirname(os.path.abspath(__file__)) -classifier = SVM2.ModelSVM(toolset='sklearn') - - -def rddparse_data_CV(raw_row): - """ - input: (u'key0',u'cf_feat:hog:[0.056273,...]--%--cf_pic:data:\ufffd\ufffd\...--%--cf_tag:hog:True') - return: ([0.056273,...],1) - """ - data = raw_row[1].split('--%--') - feat = json.loads(data[0].split(':')[-1]) - tag = 1 if data[-1].split(':')[-1] == 'True' else 0 - return (feat, tag) - - -def rddparse_data_ILS(raw_row): - """ - input: (u'key0',u'cf_feat:hog:[0.056273,...]--%--cf_pic:data:\ufffd\ufffd\...--%--cf_tag:hog:True') - return: ([0.056273,...],1) - - In fact we can also use mapValues. - """ - key = raw_row[0] - # if key == '04650c488a2b163ca8a1f52da6022f03.jpg': - # with open('/tmp/hhhh','wb') as f: - # f.write(raw_row[1].decode('unicode-escape')).encode('latin-1') - items = raw_row[1].decode('unicode-escape').encode('latin-1').split('--%--') - data = items[0].split('cf_pic:data:')[-1] - return (key, data) - - -def rddparse_all_ILS(raw_row): - """ - Deprecated - """ - key = raw_row[0] - items = raw_row[1].decode('unicode-escape').encode('latin-1').split('--%--') - - # @TODO - # N.B "ValueError: No JSON object could be decoded" Because the spark-hbase IO is based on strings. - # And the order of items is not as expected. See ../res/row-sample.txt or check in hbase shell for that. - - data = [items[0].split('cf_pic:data:')[-1]] + [json.loads(item.split(':')[-1]) for item in - items[1:]] - - return (key, data) - - -def rddparse_dataset_ILS(raw_row): - if raw_row[0] == '04650c488a2b163ca8a1f52da6022f03.jpg': - print raw_row - items = raw_row[1].decode('unicode-escape').encode('latin-1').split('--%--') - # tag = int(items[-2].split('cf_tag:' + tagtype)[-1]) - # feat = [item for sublist in json.loads(items[-1].split('cf_feat:' + feattype)[-1]) for subsublist in sublist for item in subsublist] - tag = int(items[-1].split(':')[-1]) - feat = [item for sublist in json.loads(items[0].split(':')[-1]) for subsublist in sublist for - item in subsublist] - - return (tag, feat) - - -def rddinfo_ILS(img, info_rate=None, tag_chosen=None, tag_class=None): - """ - Tempfile is our friend. (?) - """ - info_rate = info_rate if info_rate != None else 0.0 - tag_chosen = tag_chosen if tag_chosen != None else stats.bernoulli.rvs(0.8) - tag_class = tag_class if tag_class != None else 0 - try: - tmpf = tempfile.NamedTemporaryFile(suffix='.jpg', mode='w+b', delete=True) - tmpf.write(img) - tmpf.seek(0) - im = Jpeg(tmpf.name, key=sample_key) - info = [ - im.image_width, - im.image_height, - im.image_width * im.image_height, - im.getCapacity(), - im.getQuality(), - info_rate, - tag_chosen, - tag_class - ] - return info - except Exception as e: - print e - raise - finally: - tmpf.close() - - -def rddembed_ILS(row, rate=None): - """ - input: - e.g. row =('row1',[1,3400,'hello']) - return: - newrow = ('row2',[34,5400,'embeded']) - """ - items = row[1] - capacity, chosen = int(items[4]), int(items[7]) - if chosen == 0: - return None - try: - tmpf_src = tempfile.NamedTemporaryFile(suffix='.jpg', mode='w+b') - tmpf_src.write(items[0]) - tmpf_src.seek(0) - tmpf_dst = tempfile.NamedTemporaryFile(suffix='.jpg', mode='w+b') - - steger = F5.F5(sample_key, 1) - - if rate == None: - embed_rate = steger.embed_raw_data(tmpf_src.name, - os.path.join(package_dir, '../res/toembed'), - tmpf_dst.name) - else: - assert (rate >= 0 and rate < 1) - # print capacity - hidden = np.random.bytes(int(int(capacity) * rate) / 8) - embed_rate = steger.embed_raw_data(tmpf_src.name, hidden, tmpf_dst.name, frommem=True) - - tmpf_dst.seek(0) - raw = tmpf_dst.read() - index = md5(raw).hexdigest() - - return (index + '.jpg', [raw] + rddinfo_ILS(raw, embed_rate, 0, 1)) - - except Exception as e: - print e - raise - finally: - tmpf_src.close() - tmpf_dst.close() - - -def rddembed_ILS_EXT(row, rate=None): - """ - input: - e.g. row =('row1',[1,3400,'hello']) - return: - newrow = ('row2',[34,5400,'embeded']) or NULL - [row,newrow] - """ - items = row[1] - capacity, chosen = int(items[4]), int(items[7]) - if chosen == 0: - return [row] - try: - tmpf_src = tempfile.NamedTemporaryFile(suffix='.jpg', mode='w+b') - tmpf_src.write(items[0]) - tmpf_src.seek(0) - tmpf_dst = tempfile.NamedTemporaryFile(suffix='.jpg', mode='w+b') - - steger = F5.F5(sample_key, 2) - - if rate == None: - embed_rate = steger.embed_raw_data(tmpf_src.name, - os.path.join(package_dir, '../res/toembed'), - tmpf_dst.name) - else: - assert (rate >= 0 and rate < 1) - # print capacity - hidden = np.random.bytes(int(int(capacity) * rate) / 8) - embed_rate = steger.embed_raw_data(tmpf_src.name, hidden, tmpf_dst.name, frommem=True) - - tmpf_dst.seek(0) - raw = tmpf_dst.read() - index = md5(raw).hexdigest() - - return [row, (index + '.jpg', [raw] + rddinfo_ILS(raw, embed_rate, 0, 1))] - - except Exception as e: - print e - raise - finally: - tmpf_src.close() - tmpf_dst.close() - - -def _get_feat(image, feattype='ibd', **kwargs): - if feattype == 'ibd': - feater = IntraBlockDiff.FeatIntraBlockDiff() - else: - raise Exception("Unknown feature type!") - - desc = feater.feat(image) - - return desc - - -def rddfeat_ILS(items, feattype='ibd', **kwargs): - try: - tmpf_src = tempfile.NamedTemporaryFile(suffix='.jpg', mode='w+b') - tmpf_src.write(items[0]) - tmpf_src.seek(0) - - desc = json.dumps(_get_feat(tmpf_src.name, feattype=feattype).tolist()) - # print 'desccccccccccccccccccc',desc - return items + [desc] - - except Exception as e: - print e - raise - finally: - tmpf_src.close() - - -def rddanalysis_ILS(items, feattype='ibd', **kwargs): - head = np.fromstring(items[0][:2], dtype=np.uint8) - if not np.array_equal(head, [255, 216]): - return items + [0] - try: - tmpf_src = tempfile.NamedTemporaryFile(suffix='.jpg', mode='w+b') - tmpf_src.write(items[0]) - tmpf_src.seek(0) - - desc = _get_feat(tmpf_src.name, feattype=feattype) - tag = classifier.predict(desc.ravel())[0] - # print 'desccccccccccccccccccc',desc - return items + [tag] - - except Exception as e: - print e - raise - finally: - tmpf_src.close() - - # return items + classifier.predict(items[-1]) - - -def format_out(row, cols, withdata=False): - """ - input: - e.g. row =('row1',[1,3400,'hello']) - cols = [['cf_info', 'id'], ['cf_info', 'size'], ['cf_tag', 'desc']] - return: - [('row1',['row1', 'cf_info', 'id', '1']),('row1',['row1', 'cf_info', 'size', '3400']),('row1',['row1', 'cf_tag', 'desc', 'hello'])] - """ - puts = [] - key = row[0] - # if key == '04650c488a2b163ca8a1f52da6022f03.jpg': - # print row - if not withdata: - for data, col in zip(row[1][1:], cols[1:]): - puts.append((key, [key] + col + [str(data)])) - else: - for data, col in zip(row[1], cols): - puts.append((key, [key] + col + [str(data)])) - return puts - - -# scconf = SparkConf() -# scconf.setSparkHome("HPC-server") \ -# .setMaster("spark://HPC-server:7077") \ -# .setAppName("example") -# sc = SparkContext(conf=scconf) -# -# -# def read_hbase(table_name, func=None, collect=False): -# """ -# ref - http://happybase.readthedocs.org/en/latest/user.html#retrieving-data -# -# Filter format: -# columns=['cf1:col1', 'cf1:col2'] -# or -# columns=['cf1'] -# -# """ -# -# hconf = { -# "hbase.zookeeper.quorum": "HPC-server, HPC, HPC2", -# # "hbase.zookeeper.quorum": self.host, -# "hbase.mapreduce.inputtable": table_name, -# } -# -# hbase_rdd = sc.newAPIHadoopRDD(inputFormatClass=hparams["inputFormatClass"], -# keyClass=hparams["readKeyClass"], -# valueClass=hparams["readValueClass"], -# keyConverter=hparams["readKeyConverter"], -# valueConverter=hparams["readValueConverter"], -# conf=hconf) -# -# parser = func if func != None else rddparse_data_CV -# hbase_rdd = hbase_rdd.map(lambda x: parser(x)) -# -# if collect: -# return hbase_rdd.collect() -# else: -# return hbase_rdd -# -# -# def write_hbase(table_name, data, fromrdd=False, columns=None, withdata=False): -# """ -# Data Format: (Deprecated) -# e.g. [["row8", "f1", "", "caocao cao"], ["row9", "f1", "c1", "asdfg hhhh"]] -# -# Data(from dictionary): -# e.g. data ={'row1':[1,3400,'hello'], 'row2':[34,5000,'here in mine']}, -# cols = ['cf_info:id', 'cf_info:size', 'cf_tag:desc'] -# Data(from Rdd): -# e.g. data =[('row1',[1,3400,'hello']), ('row2',[34,5000,'here in mine'])], -# cols = ['cf_info:id', 'cf_info:size', 'cf_tag:desc'] -# """ -# hconf = { -# "hbase.zookeeper.quorum": "HPC-server, HPC, HPC2", # "hbase.zookeeper.quorum": self.host, -# "hbase.mapreduce.inputtable": table_name, -# "hbase.mapred.outputtable": table_name, -# "mapreduce.outputformat.class": hparams["outputFormatClass"], -# "mapreduce.job.output.key.class": hparams["writeKeyClass"], -# "mapreduce.job.output.value.class": hparams["writeValueClass"], -# } -# cols = [col.split(':') for col in columns] -# if not fromrdd: -# rdd_data = sc.parallelize(data) -# else: -# rdd_data = data -# -# rdd_data.flatMap(lambda x: format_out(x, cols, withdata=withdata)).saveAsNewAPIHadoopDataset( -# conf=hconf, -# keyConverter=hparams["writeKeyConverter"], -# valueConverter=hparams["writeValueConverter"]) class Sparker(object): diff --git a/mspark/rdd.py b/mspark/rdd.py new file mode 100644 index 0000000..2f982d4 --- /dev/null +++ b/mspark/rdd.py @@ -0,0 +1,267 @@ +__author__ = 'hadoop' + +from ..common import * + +from ..mjpeg import * +from ..msteg import * +from ..msteg.steganography import LSB, F3, F4, F5 +from ..mfeat import IntraBlockDiff +from ..mmodel.svm import SVM + +from numpy import array +import json +import pickle +import tempfile + +import numpy as np +from scipy import stats +from hashlib import md5 + +np.random.seed(sum(map(ord, "whoami"))) +package_dir = os.path.dirname(os.path.abspath(__file__)) +classifier = SVM.ModelSVM(toolset='sklearn') + +def rddparse_data_CV(raw_row): + """ + input: (u'key0',u'cf_feat:hog:[0.056273,...]--%--cf_pic:data:\ufffd\ufffd\...--%--cf_tag:hog:True') + return: ([0.056273,...],1) + """ + data = raw_row[1].split('--%--') + feat = json.loads(data[0].split(':')[-1]) + tag = 1 if data[-1].split(':')[-1] == 'True' else 0 + return (feat, tag) + + +def rddparse_data_ILS(raw_row): + """ + input: (u'key0',u'cf_feat:hog:[0.056273,...]--%--cf_pic:data:\ufffd\ufffd\...--%--cf_tag:hog:True') + return: ([0.056273,...],1) + + In fact we can also use mapValues. + """ + key = raw_row[0] + # if key == '04650c488a2b163ca8a1f52da6022f03.jpg': + # with open('/tmp/hhhh','wb') as f: + # f.write(raw_row[1].decode('unicode-escape')).encode('latin-1') + items = raw_row[1].decode('unicode-escape').encode('latin-1').split('--%--') + data = items[0].split('cf_pic:data:')[-1] + return (key, data) + + +def rddparse_all_ILS(raw_row): + """ + Deprecated + """ + key = raw_row[0] + items = raw_row[1].decode('unicode-escape').encode('latin-1').split('--%--') + + # @TODO + # N.B "ValueError: No JSON object could be decoded" Because the spark-hbase IO is based on strings. + # And the order of items is not as expected. See ../res/row-sample.txt or check in hbase shell for that. + + data = [items[0].split('cf_pic:data:')[-1]] + [json.loads(item.split(':')[-1]) for item in + items[1:]] + + return (key, data) + + +def rddparse_dataset_ILS(raw_row): + if raw_row[0] == '04650c488a2b163ca8a1f52da6022f03.jpg': + print raw_row + items = raw_row[1].decode('unicode-escape').encode('latin-1').split('--%--') + # tag = int(items[-2].split('cf_tag:' + tagtype)[-1]) + # feat = [item for sublist in json.loads(items[-1].split('cf_feat:' + feattype)[-1]) for subsublist in sublist for item in subsublist] + tag = int(items[-1].split(':')[-1]) + feat = [item for sublist in json.loads(items[0].split(':')[-1]) for subsublist in sublist for + item in subsublist] + + return (tag, feat) + + +def rddinfo_ILS(img, info_rate=None, tag_chosen=None, tag_class=None): + """ + Tempfile is our friend. (?) + """ + info_rate = info_rate if info_rate != None else 0.0 + tag_chosen = tag_chosen if tag_chosen != None else stats.bernoulli.rvs(0.8) + tag_class = tag_class if tag_class != None else 0 + try: + tmpf = tempfile.NamedTemporaryFile(suffix='.jpg', mode='w+b', delete=True) + tmpf.write(img) + tmpf.seek(0) + im = Jpeg(tmpf.name, key=sample_key) + info = [ + im.image_width, + im.image_height, + im.image_width * im.image_height, + im.getCapacity(), + im.getQuality(), + info_rate, + tag_chosen, + tag_class + ] + return info + except Exception as e: + print e + raise + finally: + tmpf.close() + + +def rddembed_ILS(row, rate=None): + """ + input: + e.g. row =('row1',[1,3400,'hello']) + return: + newrow = ('row2',[34,5400,'embeded']) + """ + items = row[1] + capacity, chosen = int(items[4]), int(items[7]) + if chosen == 0: + return None + try: + tmpf_src = tempfile.NamedTemporaryFile(suffix='.jpg', mode='w+b') + tmpf_src.write(items[0]) + tmpf_src.seek(0) + tmpf_dst = tempfile.NamedTemporaryFile(suffix='.jpg', mode='w+b') + + steger = F5.F5(sample_key, 1) + + if rate == None: + embed_rate = steger.embed_raw_data(tmpf_src.name, + os.path.join(package_dir, '../res/toembed'), + tmpf_dst.name) + else: + assert (rate >= 0 and rate < 1) + # print capacity + hidden = np.random.bytes(int(int(capacity) * rate) / 8) + embed_rate = steger.embed_raw_data(tmpf_src.name, hidden, tmpf_dst.name, frommem=True) + + tmpf_dst.seek(0) + raw = tmpf_dst.read() + index = md5(raw).hexdigest() + + return (index + '.jpg', [raw] + rddinfo_ILS(raw, embed_rate, 0, 1)) + + except Exception as e: + print e + raise + finally: + tmpf_src.close() + tmpf_dst.close() + + +def rddembed_ILS_EXT(row, rate=None): + """ + input: + e.g. row =('row1',[1,3400,'hello']) + return: + newrow = ('row2',[34,5400,'embeded']) or NULL + [row,newrow] + """ + items = row[1] + capacity, chosen = int(items[4]), int(items[7]) + if chosen == 0: + return [row] + try: + tmpf_src = tempfile.NamedTemporaryFile(suffix='.jpg', mode='w+b') + tmpf_src.write(items[0]) + tmpf_src.seek(0) + tmpf_dst = tempfile.NamedTemporaryFile(suffix='.jpg', mode='w+b') + + steger = F5.F5(sample_key, 2) + + if rate == None: + embed_rate = steger.embed_raw_data(tmpf_src.name, + os.path.join(package_dir, '../res/toembed'), + tmpf_dst.name) + else: + assert (rate >= 0 and rate < 1) + # print capacity + hidden = np.random.bytes(int(int(capacity) * rate) / 8) + embed_rate = steger.embed_raw_data(tmpf_src.name, hidden, tmpf_dst.name, frommem=True) + + tmpf_dst.seek(0) + raw = tmpf_dst.read() + index = md5(raw).hexdigest() + + return [row, (index + '.jpg', [raw] + rddinfo_ILS(raw, embed_rate, 0, 1))] + + except Exception as e: + print e + raise + finally: + tmpf_src.close() + tmpf_dst.close() + + +def _get_feat(image, feattype='ibd', **kwargs): + if feattype == 'ibd': + feater = IntraBlockDiff.FeatIntraBlockDiff() + else: + raise Exception("Unknown feature type!") + + desc = feater.feat(image) + + return desc + + +def rddfeat_ILS(items, feattype='ibd', **kwargs): + try: + tmpf_src = tempfile.NamedTemporaryFile(suffix='.jpg', mode='w+b') + tmpf_src.write(items[0]) + tmpf_src.seek(0) + + desc = json.dumps(_get_feat(tmpf_src.name, feattype=feattype).tolist()) + # print 'desccccccccccccccccccc',desc + return items + [desc] + + except Exception as e: + print e + raise + finally: + tmpf_src.close() + + +def rddanalysis_ILS(items, feattype='ibd', **kwargs): + head = np.fromstring(items[0][:2], dtype=np.uint8) + if not np.array_equal(head, [255, 216]): + return items + [0] + try: + tmpf_src = tempfile.NamedTemporaryFile(suffix='.jpg', mode='w+b') + tmpf_src.write(items[0]) + tmpf_src.seek(0) + + desc = _get_feat(tmpf_src.name, feattype=feattype) + tag = classifier.predict(desc.ravel())[0] + # print 'desccccccccccccccccccc',desc + return items + [tag] + + except Exception as e: + print e + raise + finally: + tmpf_src.close() + + # return items + classifier.predict(items[-1]) + + +def format_out(row, cols, withdata=False): + """ + input: + e.g. row =('row1',[1,3400,'hello']) + cols = [['cf_info', 'id'], ['cf_info', 'size'], ['cf_tag', 'desc']] + return: + [('row1',['row1', 'cf_info', 'id', '1']),('row1',['row1', 'cf_info', 'size', '3400']),('row1',['row1', 'cf_tag', 'desc', 'hello'])] + """ + puts = [] + key = row[0] + # if key == '04650c488a2b163ca8a1f52da6022f03.jpg': + # print row + if not withdata: + for data, col in zip(row[1][1:], cols[1:]): + puts.append((key, [key] + col + [str(data)])) + else: + for data, col in zip(row[1], cols): + puts.append((key, [key] + col + [str(data)])) + return puts -- libgit2 0.21.2