tristanz / ScytheMCMC (http://bytebucket.org/tristanz/scythemcmc/wiki/html/index.html)

A Scythe Markov Chain Monte Carlo C++ Framework

Clone this repository (size: 1.0 MB): HTTPS / SSH
$ hg clone http://bitbucket.org/tristanz/scythemcmc
commit 13: 9844e0828091
parent 12: fd1ece924824
branch: default
tags: tip
Updates. Switched to Lecuyer and enable seed saving.
Tristan Zajonc / tristanz
14 months ago

Changed (Δ2.2 KB):

raw changeset »

ScytheBugs.txt (15 lines added, 1 lines removed)

src/mcmc.h (122 lines added, 50 lines removed)

Up to file-list ScytheBugs.txt:

@@ -12,8 +12,23 @@ Bugs:
12
12
  Compiling the help using the latest doxygen seems to correct the problem, 
13
13
  although macros don't get documented really.
14
14
15
- Docs for scythe::min refer to max.
16
17
- What is linesearch? Docs are not very clear.
18
19
- Reading in from file is fragile.  outsheet in stata on OSX does not
20
  use the proper newline characters.  newlines across dos/unix/osx differ.
21
22
- col_interchange doesn't sound like it is what it is in help file.  It swaps
23
	rows rather than reordering.
24
25
- qnorm1 has return type "double double sd double" in docs.
26
15
27
Missing / Potential Features:
16
28
29
- rtnorm is parameterized using the variance whereas rnorm is parameterized
30
  with sd.  Possibility of errors.
31
17
32
- Fast versions of functions proportional to log density.  In most MCMC
18
33
  applications sum(log(scythe::dnorm)), for instance, takes up a significant
19
34
  amount of time.  However we usually just need the log of something
@@ -64,4 +79,3 @@ Missing / Potential Features:
64
79
  etc. Then add a -DHAS_MKL flag. See:
65
80
	http://www.intel.com/software/products/mkl/docs/WebHelp/mkl.htm
66
81
	
67

Up to file-list src/mcmc.h:

88
88
#include <iostream>
89
89
#include <fstream>
90
90
#include <scythestat/rng/mersenne.h>
91
#include <scythestat/rng/lecuyer.h>
91
92
#include <scythestat/distributions.h>
92
93
#include <scythestat/matrix.h>
93
94
#include <scythestat/rng.h>
94
95
#include <scythestat/smath.h>
95
96
#include <scythestat/stat.h>
97
#include <scythestat/ide.h>
98
#include <scythestat/defs.h>
96
99
#include <stdio.h>
97
100
#include <stdlib.h>
98
101
#include <sys/stat.h>
99
102
#include "Simple/SimpleOpt.h"
100
103
#include "Simple/SimpleIni.h"
104
// #include <mkl_vml_functions.h>
105
// #include <mkl_vml_defines.h>
106
107
#include "scythe_extensions.h"
101
108
102
109
/// Scythe MCMC version.
103
110
#define SCYTHE_MCMC_VERSION "0.1"
@@ -118,9 +125,9 @@ typedef pair<int, int> ipair;
118
125
double dInf = std::numeric_limits<double>::infinity();
119
126
double iInf = std::numeric_limits<int>::infinity();
120
127
121
/// Global Mersenne-Twister random number generator.
122
mersenne myrng;
123
128
/// Global L'Ecuyer random number generator.
129
lecuyer myrng;
130
//mersenne myrng;
124
131
125
132
/*! \brief Basic MCMC options.
126
133
 *
@@ -137,7 +144,7 @@ struct MCMCOptions {
137
144
  /// Chains.
138
145
  int chains;
139
146
  /// Seed for random number generator.
140
  int random_seed;
147
  unsigned long random_seed [6];
141
148
  /// Config file (.ini style).
142
149
  string config_file;
143
150
  /// Out file to save parameter chains.
@@ -189,6 +196,18 @@ inline string to_csv (const T& t) {
189
196
  return s;
190
197
}
191
198
199
/*! \brief Convert any streaming type to string
200
 *
201
 */
202
template <class T>
203
inline string to_string (const T& t) {
204
  std::stringstream ss;
205
  ss << t;
206
  string s = ss.str();
207
  return s;
208
}
209
210
192
211
/*! \brief Shows command line usage.
193
212
194
213
   Diplays:
@@ -202,11 +221,11 @@ inline string to_csv (const T& t) {
202
221
       --chains=arg                 number of chains
203
222
       --sample-size=arg            retained sample size
204
223
       --burnin=arg                 burn in period
205
       --thin=arg                   thinning iterval
206
       --random-seed=arg            random number seed
224
       --thin=arg                   thinning iterval (1 = no thinning)
225
       --random-seed=arg            random number seed (0 uses current timestamp)
207
226
208
     Example: ./base_model --config-file=base_model.cfg --out-file=out.txt --chains=1 --sample-size=1000
209
              --burnin=0 --thin=1 --random-seed=12345
227
     Example: ./command --config-file=command.ini --out-file=out.txt --chains=1 --sample-size=1000
228
              --burnin=0 --thin=1 --random-seed=0
210
229
</pre>
211
230
*/
212
231
void ShowUsage() {
@@ -217,11 +236,11 @@ void ShowUsage() {
217
236
       << "    --chains=arg \t\t number of chains" << endl
218
237
       << "    --sample-size=arg \t\t retained sample size" << endl
219
238
       << "    --burnin=arg \t\t burn in period" << endl
220
       << "    --thin=arg \t\t\t thinning iterval (0 = no thinning)" << endl
221
       << "    --random-seed=arg \t\t random number seed" << endl << endl;
239
       << "    --thin=arg \t\t\t thinning iterval (1 = no thinning)" << endl
240
       << "    --random-seed=arg \t\t random number seed (0 uses current timestamp, 1 attempts to load from lecuyer.seed file)" << endl << endl;
222
241
223
  cout << "  Example: ./base_model --config-file=base_model.cfg --out-file=out.txt --chains=1 --sample-size=1000" << endl
224
       << "\t   --burnin=0 --thin=1 --random-seed=12345" << endl << endl;
242
  cout << "  Example: ./command --config-file=command.ini --out-file=out.txt --chains=1 --sample-size=1000" << endl
243
       << "\t   --burnin=0 --thin=1 --random-seed=0" << endl << endl;
225
244
}
226
245
227
246
/*! \brief Parses command line options.
@@ -285,7 +304,36 @@ void DefaultStartUp(int argc, char* argv
285
304
        break;
286
305
      case OPT_RANDOM_SEED:
287
306
        setflags++;
288
        mcmc_options.random_seed = atoi(args.OptionArg());
307
        unsigned long seed = atoi(args.OptionArg());
308
				cout << "TEST" << seed<<endl;
309
				if(seed == 0) {
310
					seed = time(NULL);
311
					for(int i = 0; i < 6; ++i) {
312
						mcmc_options.random_seed[i] = seed;
313
					}
314
				}
315
				else if (seed == 1) {
316
					// try to load seed from file
317
					cout << "Try loading file..." << endl;
318
					if (FileExists("lecuyer.seed")) {
319
						Matrix<unsigned long, Col> seedmat("lecuyer.seed");
320
						for(int i = 0; i < 6; ++i) {
321
							mcmc_options.random_seed[i] = seedmat(i);
322
						}
323
					}
324
					else {
325
						cout << "No seed exists..." << endl;
326
						seed = time(NULL);
327
						for(int i = 0; i < 6; ++i) {
328
							mcmc_options.random_seed[i] = seed;
329
						}
330
					}
331
				}
332
				else {
333
					for(int i = 0; i < 6; ++i) {
334
						mcmc_options.random_seed[i] = seed;
335
					}
336
				}
289
337
        break;
290
338
      }
291
339
    } else {
@@ -309,8 +357,8 @@ void DefaultStartUp(int argc, char* argv
309
357
    cout << "Burn in cannot be negative.\n";
310
358
    exit(1);
311
359
  }
312
  if (mcmc_options.thin < 0) {
313
    cout << "Thinning interval cannot be negative.\n";
360
  if (mcmc_options.thin < 1) {
361
    cout << "Thinning interval must be positive.\n";
314
362
    exit(1);
315
363
  }
316
364
  if (!FileExists(mcmc_options.config_file)) {
@@ -319,8 +367,9 @@ void DefaultStartUp(int argc, char* argv
319
367
  }
320
368
321
369
  // Seed random number generator.
322
  myrng.initialize(mcmc_options.random_seed);
323
370
  //myrng.initialize(mcmc_options.random_seed);
371
	myrng.SetSeed(mcmc_options.random_seed);
372
	
324
373
  // Print mcmc options.
325
374
  cout << "Arguments:" << endl;
326
375
  cout << "  Config file: " << mcmc_options.config_file << endl
@@ -328,7 +377,13 @@ void DefaultStartUp(int argc, char* argv
328
377
       << "  Desired sample size: " << mcmc_options.sample_size << endl
329
378
       << "  Burn in period: " << mcmc_options.burnin << endl
330
379
       << "  Thinning interval: " << mcmc_options.thin << endl
331
       << "  Random number seed: " << mcmc_options.random_seed << endl << endl;
380
       << "  Random number seed: "
381
 						<< mcmc_options.random_seed[0] << " "
382
 						<< mcmc_options.random_seed[1] << " "
383
						<< mcmc_options.random_seed[2] << " "
384
						<< mcmc_options.random_seed[3] << " "
385
						<< mcmc_options.random_seed[4] << " "
386
						<< mcmc_options.random_seed[5] << endl << endl;
332
387
}
333
388
334
389
@@ -455,7 +510,7 @@ public:
455
510
456
511
  /// Save a new value of the parameter.
457
512
  /// \param new_value New value to save.
458
  virtual void Save(double new_value) {};
513
  virtual void Save(ParameterStorageType new_value) {};
459
514
460
515
  /// Parameter is tracked / saved.
461
516
  /// \return True if parameter is tracked.
@@ -505,7 +560,7 @@ public:
505
560
   *
506
561
   * Executes the main step function, such as taking a MH step or deterministic step.
507
562
   */
508
  virtual void DoStep() {}
563
   virtual void DoStep() {}
509
564
  /*! \brief Set starting value.
510
565
   *
511
566
   *  Calls the parameter's Parameter::Starting function and then saves the result using Parameter::Save.
@@ -562,7 +617,7 @@ public:
562
617
   *  Draws from the parameter's Parameter::RandomPosterior function and then saves the result using Parameter::Save.
563
618
   */
564
619
  void DoStep() {
565
    parameter_.Save(parameter_.RandomPosterior());
620
		parameter_.Save(parameter_.RandomPosterior());
566
621
  }
567
622
568
623
  void Start() {
@@ -614,12 +669,9 @@ public:
614
669
615
670
  /*! \brief Set function step starting value.
616
671
   *
617
   * \note Because the starting value uses the normal function calculation, it is important to add
618
   * parameters in an order that makes the function feasible.  StartingValue functions are called in the order
619
   * they are added.  Any implemention of a StartingValue method for a FunctionStep will not be used.
620
672
   */
621
673
  void Start() {
622
    DoStep();
674
		parameter_.StartingValue();
623
675
  }
624
676
625
677
  string ParameterLabel() {
@@ -815,7 +867,7 @@ public:
815
867
   *
816
868
   *  Calculates the value of Parameter::Function and then saves the result using Parameter::Save.
817
869
   */
818
  void DoStep() {
870
   void DoStep() {
819
871
    // Draw a new parameter
820
872
    double new_value = proposal_.Draw(parameter_.Value());
821
873
    double old_value = parameter_.Value();
@@ -878,14 +930,16 @@ public:
878
930
   *  \param lower Lower bound on the support of the distribution (default = -Infinity)
879
931
   *  \param upper Upper bound on the support of the distribution (default = Infinity)
880
932
   */
881
  SliceStep(ParameterType parameter, double w = 1.0, double lower = -dInf, double upper = dInf) :
882
      parameter_(parameter), w_(w), lower_(lower), upper_(upper) {}
933
  SliceStep(ParameterType parameter, double w, double lower, double upper) :
934
      parameter_(parameter), w_(w), lower_(lower), upper_(upper) {
935
		step_count_ = 0;
936
	}
883
937
884
938
  /*! \brief Take a slice sampling step for the parameter.
885
939
   *
886
940
   */
887
  void DoStep() {
888
    // Find the log density at the initial point.
941
   void DoStep() {
942
	  // Find the log density at the initial point.
889
943
    double x0 = parameter_.Value();
890
944
    double gx0 = parameter_.LogDensity(x0);
891
945
@@ -945,7 +999,10 @@ public:
945
999
        L = x1;
946
1000
      }
947
1001
    }
948
1002
		
1003
		// Adapt
1004
		//step_count_++;
1005
		//w_ = (step_count_/(step_count_+1)) * w_ + (1/(step_count_+1))*(R - L);
949
1006
    // Save the point sampled
950
1007
    parameter_.Save(x1);
951
1008
  }
@@ -972,6 +1029,7 @@ private:
972
1029
  double w_;
973
1030
  double lower_;
974
1031
  double upper_;
1032
	int step_count_;
975
1033
};
976
1034
977
1035
/*! \brief MCMC sampler.
@@ -1015,23 +1073,25 @@ public:
1015
1073
   * \param number_of_iterations Number of iterations to run sampler.
1016
1074
   * \param progress Display a progress bar.
1017
1075
   */
1018
  void Iterate(int number_of_iterations, bool progress = false) {
1019
    if (progress) {
1020
      boost::progress_display show_progress(number_of_iterations);
1021
      for (int iter = 0; iter < number_of_iterations; ++iter) {
1022
        for (int i = 0; i < steps_.size(); ++i) {
1023
          steps_[i].DoStep();
1024
        }
1025
        ++show_progress;
1026
      }
1027
    } else {
1028
      for (int iter = 0; iter < number_of_iterations; ++iter) {
1029
        for (int i = 0; i < steps_.size(); ++i) {
1030
          steps_[i].DoStep();
1031
        }
1032
      }
1033
    }
1034
  }
1076
	 void Iterate(int number_of_iterations, bool progress = false) {
1077
	   if(progress) {
1078
	     boost::progress_display show_progress(number_of_iterations);
1079
	     for(int iter = 0; iter < number_of_iterations; ++iter) {
1080
	       for(int i = 0; i < steps_.size(); ++i) {
1081
	         steps_[i].DoStep();
1082
	       }
1083
	       ++show_progress;
1084
	     }
1085
	   }
1086
	   else {
1087
	     for(int iter = 0; iter < number_of_iterations; ++iter) {
1088
	       for(int i = 0; i < steps_.size(); ++i) {
1089
	         steps_[i].DoStep();
1090
	       }
1091
	     }
1092
	   }
1093
 	}
1094
1035
1095
  /*! \brief Run sampler.
1036
1096
   *
1037
1097
   * Run the MCMC sampling procedure, including burning in, sampling, thinning, displaying progress,
@@ -1075,7 +1135,7 @@ public:
1075
1135
    cout << endl << "Sampling..." << endl;
1076
1136
    boost::progress_display show_progress(sample_size_);
1077
1137
    for (int i = 0; i < sample_size_; ++i) {
1078
      Iterate(thin_ + 1);
1138
      Iterate(thin_);
1079
1139
      // Save data
1080
1140
      for (int k = 0; k < tracks_.size(); ++k) {
1081
1141
        if (k == tracks_.size() - 1) {
@@ -1089,7 +1149,18 @@ public:
1089
1149
    }
1090
1150
1091
1151
    outfile.close();
1092
1152
		
1153
		// Save lecuyer seed for future use
1154
		Matrix<unsigned long, Col> seedmat(6, 1, false);
1155
		unsigned long seed[6];
1156
		myrng.GetState(seed);
1157
		cout << "Saving seed: " << endl << seed[0] << " " << seed[1] << " " << seed[2] 
1158
			<< " " << seed[3] << " " << seed[4] << " " << seed[5] << endl;
1159
		for(int i = 0; i < 6; ++i) {
1160
			seedmat(i) = seed[i];
1161
		}
1162
		seedmat.save("lecuyer.seed", 'o');
1163
		
1093
1164
    cout << "Total elapsed time: " << timer.elapsed() << " seconds" << endl;
1094
1165
  }
1095
1166
  /*! \brief Number of steps in one sampler iteration.
@@ -1142,3 +1213,4 @@ double fast_log_dnorm(const double x, co
1142
1213
}
1143
1214
1144
1215
1216