diff --git a/mdata/ILSVRC.py b/mdata/ILSVRC.py index 89f4dec..c2b3236 100644 --- a/mdata/ILSVRC.py +++ b/mdata/ILSVRC.py @@ -457,7 +457,7 @@ class DataILSVRC(DataDumperBase): dict_dataset[hash] = (tag, im.getCoefMatrix(channel='Y')) for tag, feat in dict_dataset.values(): - X.append(feat.tolist()) + X.append(feat) Y.append(int(tag)) else: diff --git a/mmodel/theano/THEANO.py b/mmodel/theano/THEANO.py index dd51d16..26a20d4 100644 --- a/mmodel/theano/THEANO.py +++ b/mmodel/theano/THEANO.py @@ -9,6 +9,11 @@ from .theanoutil import * import numpy as np from sklearn import cross_validation +import gzip +import cPickle + +package_dir = os.path.dirname(os.path.abspath(__file__)) + class ModelTHEANO(ModelBase): def __init__(self, toolset='cnn', sc=None): @@ -17,9 +22,23 @@ class ModelTHEANO(ModelBase): self.sparker = sc self.model = None - def _train_cnn(self, X, Y, learning_rate=0.1, n_epochs=200, nkerns=[20, 50, 50], + def _train_cnn(self, X=None, Y=None, dataset=os.path.join(package_dir, '../../res/', 'ils_crop.pkl'), + learning_rate=0.1, n_epochs=200, + nkerns=[20, 50, 50], batch_size=200): - X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=0.2, random_state=0) + + if X == None: + assert dataset != None + with open(dataset, 'rb') as f: + train_set, test_set = cPickle.load(f) + + X_train, Y_train = train_set + X_test, Y_test = test_set + else: + X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=0.2, random_state=0) + + print type(X), type(X_train), type(X_train[0]) + return X_train, Y_train = np.array(X_train), np.array(Y_train) X_test, Y_test = np.array(X_test), np.array(Y_test) diff --git a/test/test_model.py b/test/test_model.py index 2152a52..3c99447 100755 --- a/test/test_model.py +++ b/test/test_model.py @@ -7,8 +7,12 @@ from ..mdata import CV, ILSVRC, ILSVRC_S from ..mmodel.svm import SVM from ..mmodel.theano import THEANO -timer = Timer() +import gzip +import cPickle + +timer = Timer() +package_dir = os.path.dirname(os.path.abspath(__file__)) def test_SVM_CV(): timer.mark() @@ -151,10 +155,13 @@ def test_THEANO_crop(): dilc = ILSVRC.DataILSVRC(base_dir='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val', category='Test_crop_pil') X, Y = dilc.load_data(mode='local', feattype='coef') timer.report() + X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=0.2, random_state=0) + with open(os.path.join(package_dir,'../res/','ils_crop.pkl'),'wb') as f: + cPickle.dump([(X_train,Y_train),(X_test,Y_test)], f) timer.mark() mtheano = THEANO.ModelTHEANO(toolset='cnn') - mtheano.train(X,Y) + mtheano._train_cnn(dataset='../../res/ils_crop.pkl') timer.report() -- libgit2 0.21.2