From bde8352b4ba5a61935b4d7d0399c063df76951bb Mon Sep 17 00:00:00 2001 From: Chunk Date: Sun, 10 May 2015 12:25:32 +0800 Subject: [PATCH] shuffling. --- mdata/ILSVRC.py | 18 ++++++++++++------ mmodel/caffe/helper.py | 40 +++++++++++++++++++++++++++------------- test/test_data.py | 10 ++++------ 3 files changed, 43 insertions(+), 25 deletions(-) diff --git a/mdata/ILSVRC.py b/mdata/ILSVRC.py index 427b37d..8dd8526 100644 --- a/mdata/ILSVRC.py +++ b/mdata/ILSVRC.py @@ -299,10 +299,10 @@ class DataILSVRC(DataDumperBase): # if w < 300 or h < 300: # continue # left, upper = random.randint(0, w - 300), random.randint(0, h - 300) - # img_crop = img[upper:upper + 300, left:left + 300] - # cv2.imwrite(os.path.join(base_dir, category + '_crop_cv', name), img_crop) + # img_crop = img[upper:upper + 300, left:left + 300] + # cv2.imwrite(os.path.join(base_dir, category + '_crop_cv', name), img_crop) # except Exception as e: - # print '[EXCPT]', e + # print '[EXCPT]', e # pass @@ -439,7 +439,7 @@ class DataILSVRC(DataDumperBase): pass - def load_data(self, mode='local', feattype='ibd', tagtype='class'): + def load_data(self, mode='local', feattype='ibd', tagtype='class', shuffle=False): INDEX = [] X = [] Y = [] @@ -461,7 +461,8 @@ class DataILSVRC(DataDumperBase): for tag, feat in dict_dataset.values(): feat.ravel()[[i * 200 + j for i in range(0, 200, 8) for j in range(0, 200, 8)]] = 0 - # feat = np.bitwise_and(feat, 1) + feat = np.absolute(feat) + feat = np.bitwise_and(feat, 1) X.append(feat.ravel()) Y.append(int(tag)) @@ -503,8 +504,13 @@ class DataILSVRC(DataDumperBase): else: raise Exception("Unknown mode!") - return X, Y + if shuffle: + # shuffling + Z = zip(X, Y) + np.random.shuffle(Z) + return Z + return X, Y diff --git a/mmodel/caffe/helper.py b/mmodel/caffe/helper.py index 9b9b368..72083e1 100644 --- a/mmodel/caffe/helper.py +++ b/mmodel/caffe/helper.py @@ -61,23 +61,37 @@ def _write_lmdb_raw(X, Y, lmdb_name_data='../res/data_lmdb', lmdb_name_label='.. in_db_data.close() -def write_lmdb(X, Y, lmdb_name_data='../res/data_lmdb', lmdb_name_label='../res/label_lmdb'): +def write_lmdb(X, Y=None, lmdb_name_data='../res/data_lmdb', lmdb_name_label='../res/label_lmdb'): """ X - numpy array of data. Y - numpy array of labels. """ - print('writing image data...') - for idx in range(int(math.ceil(len(Y) / 1000.0))): - in_db_data = lmdb.open(lmdb_name_data, map_size=int(1e12)) - with in_db_data.begin(write=True) as in_txn: - for in_idx, (in_, label_) in enumerate( - zip(X[(1000 * idx):(1000 * (idx + 1))], Y[(1000 * idx):(1000 * (idx + 1))])): - # im = caffe.io.load_image(in_) - im_dat = caffe.io.array_to_datum(np.array(in_, dtype=int).reshape(1, 200, 200), label_) - in_txn.put('{:0>10d}'.format(1000 * idx + in_idx), im_dat.SerializeToString()) - - print str(1000 * idx + in_idx + 1) + ' / ' + str(len(X)) - in_db_data.close() + if Y != None: + print('writing image data...') + for idx in range(int(math.ceil(len(Y) / 1000.0))): + in_db_data = lmdb.open(lmdb_name_data, map_size=int(1e12)) + with in_db_data.begin(write=True) as in_txn: + for in_idx, (in_, label_) in enumerate( + zip(X[(1000 * idx):(1000 * (idx + 1))], Y[(1000 * idx):(1000 * (idx + 1))])): + # im = caffe.io.load_image(in_) + im_dat = caffe.io.array_to_datum(np.array(in_, dtype=int).reshape(1, 200, 200), label_) + in_txn.put('{:0>10d}'.format(1000 * idx + in_idx), im_dat.SerializeToString()) + + print str(1000 * idx + in_idx + 1) + ' / ' + str(len(X)) + in_db_data.close() + else: + assert isinstance(X[0], tuple) + print('writing image data...') + for idx in range(int(math.ceil(len(X) / 1000.0))): + in_db_data = lmdb.open(lmdb_name_data, map_size=int(1e12)) + with in_db_data.begin(write=True) as in_txn: + for in_idx, (in_, label_) in enumerate(X[(1000 * idx):(1000 * (idx + 1))]): + # im = caffe.io.load_image(in_) + im_dat = caffe.io.array_to_datum(np.array(in_, dtype=int).reshape(1, 200, 200), label_) + in_txn.put('{:0>10d}'.format(1000 * idx + in_idx), im_dat.SerializeToString()) + + print str(1000 * idx + in_idx + 1) + ' / ' + str(len(X)) + in_db_data.close() if __name__ == '__main__': diff --git a/test/test_data.py b/test/test_data.py index 612ad85..d52258f 100755 --- a/test/test_data.py +++ b/test/test_data.py @@ -6,6 +6,7 @@ from ..mdata import MSR, CV, ILSVRC, ILSVRC_S, crop from ..mmodel.caffe.helper import * + def test_MSR(): dmsr = MSR.DataMSR() # msrd.format() @@ -164,14 +165,11 @@ def test_caffe(): # return dil = ILSVRC.DataILSVRC(base_dir='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val', category='Train_5000_crop_pil') - X, Y = dil.load_data(mode='local', feattype='coef') + X = dil.load_data(mode='local', feattype='coef', shuffle=True) print X[0] - print Y - print np.array(X).shape, np.array(Y).shape - - write_lmdb(X[2000:3000],Y[2000:3000]) - + print np.array(X).shape + write_lmdb(X[7000:]) if __name__ == '__main__': -- libgit2 0.21.2