tristanz / ScytheMCMC (http://bytebucket.org/tristanz/scythemcmc/wiki/html/index.html)
A Scythe Markov Chain Monte Carlo C++ Framework
Clone this repository (size: 1.0 MB): HTTPS / SSH
$ hg clone http://bitbucket.org/tristanz/scythemcmc
| commit 13: | 9844e0828091 |
| parent 12: | fd1ece924824 |
| branch: | default |
| tags: | tip |
Updates. Switched to Lecuyer and enable seed saving.
Changed (Δ2.2 KB):
raw changeset »
ScytheBugs.txt (15 lines added, 1 lines removed)
src/mcmc.h (122 lines added, 50 lines removed)
Up to file-list ScytheBugs.txt:
| … | … | @@ -12,8 +12,23 @@ Bugs: |
12 |
12 |
Compiling the help using the latest doxygen seems to correct the problem, |
13 |
13 |
although macros don't get documented really. |
14 |
14 |
|
15 |
- Docs for scythe::min refer to max. |
|
16 |
||
17 |
- What is linesearch? Docs are not very clear. |
|
18 |
||
19 |
- Reading in from file is fragile. outsheet in stata on OSX does not |
|
20 |
use the proper newline characters. newlines across dos/unix/osx differ. |
|
21 |
||
22 |
- col_interchange doesn't sound like it is what it is in help file. It swaps |
|
23 |
rows rather than reordering. |
|
24 |
||
25 |
- qnorm1 has return type "double double sd double" in docs. |
|
26 |
||
15 |
27 |
Missing / Potential Features: |
16 |
28 |
|
29 |
- rtnorm is parameterized using the variance whereas rnorm is parameterized |
|
30 |
with sd. Possibility of errors. |
|
31 |
||
17 |
32 |
- Fast versions of functions proportional to log density. In most MCMC |
18 |
33 |
applications sum(log(scythe::dnorm)), for instance, takes up a significant |
19 |
34 |
amount of time. However we usually just need the log of something |
| … | … | @@ -64,4 +79,3 @@ Missing / Potential Features: |
64 |
79 |
etc. Then add a -DHAS_MKL flag. See: |
65 |
80 |
http://www.intel.com/software/products/mkl/docs/WebHelp/mkl.htm |
66 |
81 |
|
67 |
88 |
88 |
#include <iostream> |
89 |
89 |
#include <fstream> |
90 |
90 |
#include <scythestat/rng/mersenne.h> |
91 |
#include <scythestat/rng/lecuyer.h> |
|
91 |
92 |
#include <scythestat/distributions.h> |
92 |
93 |
#include <scythestat/matrix.h> |
93 |
94 |
#include <scythestat/rng.h> |
94 |
95 |
#include <scythestat/smath.h> |
95 |
96 |
#include <scythestat/stat.h> |
97 |
#include <scythestat/ide.h> |
|
98 |
#include <scythestat/defs.h> |
|
96 |
99 |
#include <stdio.h> |
97 |
100 |
#include <stdlib.h> |
98 |
101 |
#include <sys/stat.h> |
99 |
102 |
#include "Simple/SimpleOpt.h" |
100 |
103 |
#include "Simple/SimpleIni.h" |
104 |
// #include <mkl_vml_functions.h> |
|
105 |
// #include <mkl_vml_defines.h> |
|
106 |
||
107 |
#include "scythe_extensions.h" |
|
101 |
108 |
|
102 |
109 |
/// Scythe MCMC version. |
103 |
110 |
#define SCYTHE_MCMC_VERSION "0.1" |
| … | … | @@ -118,9 +125,9 @@ typedef pair<int, int> ipair; |
118 |
125 |
double dInf = std::numeric_limits<double>::infinity(); |
119 |
126 |
double iInf = std::numeric_limits<int>::infinity(); |
120 |
127 |
|
121 |
/// Global Mersenne-Twister random number generator. |
|
122 |
mersenne myrng; |
|
123 |
||
128 |
/// Global L'Ecuyer random number generator. |
|
129 |
lecuyer myrng; |
|
130 |
//mersenne myrng; |
|
124 |
131 |
|
125 |
132 |
/*! \brief Basic MCMC options. |
126 |
133 |
* |
| … | … | @@ -137,7 +144,7 @@ struct MCMCOptions { |
137 |
144 |
/// Chains. |
138 |
145 |
int chains; |
139 |
146 |
/// Seed for random number generator. |
140 |
|
|
147 |
unsigned long random_seed [6]; |
|
141 |
148 |
/// Config file (.ini style). |
142 |
149 |
string config_file; |
143 |
150 |
/// Out file to save parameter chains. |
| … | … | @@ -189,6 +196,18 @@ inline string to_csv (const T& t) { |
189 |
196 |
return s; |
190 |
197 |
} |
191 |
198 |
|
199 |
/*! \brief Convert any streaming type to string |
|
200 |
* |
|
201 |
*/ |
|
202 |
template <class T> |
|
203 |
inline string to_string (const T& t) { |
|
204 |
std::stringstream ss; |
|
205 |
ss << t; |
|
206 |
string s = ss.str(); |
|
207 |
return s; |
|
208 |
} |
|
209 |
||
210 |
||
192 |
211 |
/*! \brief Shows command line usage. |
193 |
212 |
|
194 |
213 |
Diplays: |
| … | … | @@ -202,11 +221,11 @@ inline string to_csv (const T& t) { |
202 |
221 |
--chains=arg number of chains |
203 |
222 |
--sample-size=arg retained sample size |
204 |
223 |
--burnin=arg burn in period |
205 |
--thin=arg thinning iterval |
|
206 |
--random-seed=arg random number seed |
|
224 |
--thin=arg thinning iterval (1 = no thinning) |
|
225 |
--random-seed=arg random number seed (0 uses current timestamp) |
|
207 |
226 |
|
208 |
Example: ./base_model --config-file=base_model.cfg --out-file=out.txt --chains=1 --sample-size=1000 |
|
209 |
--burnin=0 --thin=1 --random-seed=12345 |
|
227 |
Example: ./command --config-file=command.ini --out-file=out.txt --chains=1 --sample-size=1000 |
|
228 |
--burnin=0 --thin=1 --random-seed=0 |
|
210 |
229 |
</pre> |
211 |
230 |
*/ |
212 |
231 |
void ShowUsage() { |
| … | … | @@ -217,11 +236,11 @@ void ShowUsage() { |
217 |
236 |
<< " --chains=arg \t\t number of chains" << endl |
218 |
237 |
<< " --sample-size=arg \t\t retained sample size" << endl |
219 |
238 |
<< " --burnin=arg \t\t burn in period" << endl |
220 |
<< " --thin=arg \t\t\t thinning iterval (0 = no thinning)" << endl |
|
221 |
<< " --random-seed=arg \t\t random number seed" << endl << endl; |
|
239 |
<< " --thin=arg \t\t\t thinning iterval (1 = no thinning)" << endl |
|
240 |
<< " --random-seed=arg \t\t random number seed (0 uses current timestamp, 1 attempts to load from lecuyer.seed file)" << endl << endl; |
|
222 |
241 |
|
223 |
cout << " Example: ./base_model --config-file=base_model.cfg --out-file=out.txt --chains=1 --sample-size=1000" << endl |
|
224 |
<< "\t --burnin=0 --thin=1 --random-seed=12345" << endl << endl; |
|
242 |
cout << " Example: ./command --config-file=command.ini --out-file=out.txt --chains=1 --sample-size=1000" << endl |
|
243 |
<< "\t --burnin=0 --thin=1 --random-seed=0" << endl << endl; |
|
225 |
244 |
} |
226 |
245 |
|
227 |
246 |
/*! \brief Parses command line options. |
| … | … | @@ -285,7 +304,36 @@ void DefaultStartUp(int argc, char* argv |
285 |
304 |
break; |
286 |
305 |
case OPT_RANDOM_SEED: |
287 |
306 |
setflags++; |
288 |
|
|
307 |
unsigned long seed = atoi(args.OptionArg()); |
|
308 |
cout << "TEST" << seed<<endl; |
|
309 |
if(seed == 0) { |
|
310 |
seed = time(NULL); |
|
311 |
for(int i = 0; i < 6; ++i) { |
|
312 |
mcmc_options.random_seed[i] = seed; |
|
313 |
} |
|
314 |
} |
|
315 |
else if (seed == 1) { |
|
316 |
// try to load seed from file |
|
317 |
cout << "Try loading file..." << endl; |
|
318 |
if (FileExists("lecuyer.seed")) { |
|
319 |
Matrix<unsigned long, Col> seedmat("lecuyer.seed"); |
|
320 |
for(int i = 0; i < 6; ++i) { |
|
321 |
mcmc_options.random_seed[i] = seedmat(i); |
|
322 |
} |
|
323 |
} |
|
324 |
else { |
|
325 |
cout << "No seed exists..." << endl; |
|
326 |
seed = time(NULL); |
|
327 |
for(int i = 0; i < 6; ++i) { |
|
328 |
mcmc_options.random_seed[i] = seed; |
|
329 |
} |
|
330 |
} |
|
331 |
} |
|
332 |
else { |
|
333 |
for(int i = 0; i < 6; ++i) { |
|
334 |
mcmc_options.random_seed[i] = seed; |
|
335 |
} |
|
336 |
} |
|
289 |
337 |
break; |
290 |
338 |
} |
291 |
339 |
} else { |
| … | … | @@ -309,8 +357,8 @@ void DefaultStartUp(int argc, char* argv |
309 |
357 |
cout << "Burn in cannot be negative.\n"; |
310 |
358 |
exit(1); |
311 |
359 |
} |
312 |
if (mcmc_options.thin < 0) { |
|
313 |
cout << "Thinning interval cannot be negative.\n"; |
|
360 |
if (mcmc_options.thin < 1) { |
|
361 |
cout << "Thinning interval must be positive.\n"; |
|
314 |
362 |
exit(1); |
315 |
363 |
} |
316 |
364 |
if (!FileExists(mcmc_options.config_file)) { |
| … | … | @@ -319,8 +367,9 @@ void DefaultStartUp(int argc, char* argv |
319 |
367 |
} |
320 |
368 |
|
321 |
369 |
// Seed random number generator. |
322 |
myrng.initialize(mcmc_options.random_seed); |
|
323 |
||
370 |
//myrng.initialize(mcmc_options.random_seed); |
|
371 |
myrng.SetSeed(mcmc_options.random_seed); |
|
372 |
||
324 |
373 |
// Print mcmc options. |
325 |
374 |
cout << "Arguments:" << endl; |
326 |
375 |
cout << " Config file: " << mcmc_options.config_file << endl |
| … | … | @@ -328,7 +377,13 @@ void DefaultStartUp(int argc, char* argv |
328 |
377 |
<< " Desired sample size: " << mcmc_options.sample_size << endl |
329 |
378 |
<< " Burn in period: " << mcmc_options.burnin << endl |
330 |
379 |
<< " Thinning interval: " << mcmc_options.thin << endl |
331 |
<< " Random number seed: " |
|
380 |
<< " Random number seed: " |
|
381 |
<< mcmc_options.random_seed[0] << " " |
|
382 |
<< mcmc_options.random_seed[1] << " " |
|
383 |
<< mcmc_options.random_seed[2] << " " |
|
384 |
<< mcmc_options.random_seed[3] << " " |
|
385 |
<< mcmc_options.random_seed[4] << " " |
|
386 |
<< mcmc_options.random_seed[5] << endl << endl; |
|
332 |
387 |
} |
333 |
388 |
|
334 |
389 |
|
| … | … | @@ -455,7 +510,7 @@ public: |
455 |
510 |
|
456 |
511 |
/// Save a new value of the parameter. |
457 |
512 |
/// \param new_value New value to save. |
458 |
virtual void Save( |
|
513 |
virtual void Save(ParameterStorageType new_value) {}; |
|
459 |
514 |
|
460 |
515 |
/// Parameter is tracked / saved. |
461 |
516 |
/// \return True if parameter is tracked. |
| … | … | @@ -505,7 +560,7 @@ public: |
505 |
560 |
* |
506 |
561 |
* Executes the main step function, such as taking a MH step or deterministic step. |
507 |
562 |
*/ |
508 |
|
|
563 |
virtual void DoStep() {} |
|
509 |
564 |
/*! \brief Set starting value. |
510 |
565 |
* |
511 |
566 |
* Calls the parameter's Parameter::Starting function and then saves the result using Parameter::Save. |
| … | … | @@ -562,7 +617,7 @@ public: |
562 |
617 |
* Draws from the parameter's Parameter::RandomPosterior function and then saves the result using Parameter::Save. |
563 |
618 |
*/ |
564 |
619 |
void DoStep() { |
565 |
|
|
620 |
parameter_.Save(parameter_.RandomPosterior()); |
|
566 |
621 |
} |
567 |
622 |
|
568 |
623 |
void Start() { |
| … | … | @@ -614,12 +669,9 @@ public: |
614 |
669 |
|
615 |
670 |
/*! \brief Set function step starting value. |
616 |
671 |
* |
617 |
* \note Because the starting value uses the normal function calculation, it is important to add |
|
618 |
* parameters in an order that makes the function feasible. StartingValue functions are called in the order |
|
619 |
* they are added. Any implemention of a StartingValue method for a FunctionStep will not be used. |
|
620 |
672 |
*/ |
621 |
673 |
void Start() { |
622 |
|
|
674 |
parameter_.StartingValue(); |
|
623 |
675 |
} |
624 |
676 |
|
625 |
677 |
string ParameterLabel() { |
| … | … | @@ -815,7 +867,7 @@ public: |
815 |
867 |
* |
816 |
868 |
* Calculates the value of Parameter::Function and then saves the result using Parameter::Save. |
817 |
869 |
*/ |
818 |
|
|
870 |
void DoStep() { |
|
819 |
871 |
// Draw a new parameter |
820 |
872 |
double new_value = proposal_.Draw(parameter_.Value()); |
821 |
873 |
double old_value = parameter_.Value(); |
| … | … | @@ -878,14 +930,16 @@ public: |
878 |
930 |
* \param lower Lower bound on the support of the distribution (default = -Infinity) |
879 |
931 |
* \param upper Upper bound on the support of the distribution (default = Infinity) |
880 |
932 |
*/ |
881 |
SliceStep(ParameterType parameter, double w = 1.0, double lower = -dInf, double upper = dInf) : |
|
882 |
parameter_(parameter), w_(w), lower_(lower), upper_(upper) {} |
|
933 |
SliceStep(ParameterType parameter, double w, double lower, double upper) : |
|
934 |
parameter_(parameter), w_(w), lower_(lower), upper_(upper) { |
|
935 |
step_count_ = 0; |
|
936 |
} |
|
883 |
937 |
|
884 |
938 |
/*! \brief Take a slice sampling step for the parameter. |
885 |
939 |
* |
886 |
940 |
*/ |
887 |
void DoStep() { |
|
888 |
// Find the log density at the initial point. |
|
941 |
void DoStep() { |
|
942 |
// Find the log density at the initial point. |
|
889 |
943 |
double x0 = parameter_.Value(); |
890 |
944 |
double gx0 = parameter_.LogDensity(x0); |
891 |
945 |
|
| … | … | @@ -945,7 +999,10 @@ public: |
945 |
999 |
L = x1; |
946 |
1000 |
} |
947 |
1001 |
} |
948 |
||
1002 |
||
1003 |
// Adapt |
|
1004 |
//step_count_++; |
|
1005 |
//w_ = (step_count_/(step_count_+1)) * w_ + (1/(step_count_+1))*(R - L); |
|
949 |
1006 |
// Save the point sampled |
950 |
1007 |
parameter_.Save(x1); |
951 |
1008 |
} |
| … | … | @@ -972,6 +1029,7 @@ private: |
972 |
1029 |
double w_; |
973 |
1030 |
double lower_; |
974 |
1031 |
double upper_; |
1032 |
int step_count_; |
|
975 |
1033 |
}; |
976 |
1034 |
|
977 |
1035 |
/*! \brief MCMC sampler. |
| … | … | @@ -1015,23 +1073,25 @@ public: |
1015 |
1073 |
* \param number_of_iterations Number of iterations to run sampler. |
1016 |
1074 |
* \param progress Display a progress bar. |
1017 |
1075 |
*/ |
1018 |
void Iterate(int number_of_iterations, bool progress = false) { |
|
1019 |
if (progress) { |
|
1020 |
boost::progress_display show_progress(number_of_iterations); |
|
1021 |
for (int iter = 0; iter < number_of_iterations; ++iter) { |
|
1022 |
for (int i = 0; i < steps_.size(); ++i) { |
|
1023 |
steps_[i].DoStep(); |
|
1024 |
} |
|
1025 |
++show_progress; |
|
1026 |
} |
|
1027 |
} else { |
|
1028 |
for (int iter = 0; iter < number_of_iterations; ++iter) { |
|
1029 |
for (int i = 0; i < steps_.size(); ++i) { |
|
1030 |
steps_[i].DoStep(); |
|
1031 |
} |
|
1032 |
} |
|
1033 |
} |
|
1034 |
} |
|
1076 |
void Iterate(int number_of_iterations, bool progress = false) { |
|
1077 |
if(progress) { |
|
1078 |
boost::progress_display show_progress(number_of_iterations); |
|
1079 |
for(int iter = 0; iter < number_of_iterations; ++iter) { |
|
1080 |
for(int i = 0; i < steps_.size(); ++i) { |
|
1081 |
steps_[i].DoStep(); |
|
1082 |
} |
|
1083 |
++show_progress; |
|
1084 |
} |
|
1085 |
} |
|
1086 |
else { |
|
1087 |
for(int iter = 0; iter < number_of_iterations; ++iter) { |
|
1088 |
for(int i = 0; i < steps_.size(); ++i) { |
|
1089 |
steps_[i].DoStep(); |
|
1090 |
} |
|
1091 |
} |
|
1092 |
} |
|
1093 |
} |
|
1094 |
||
1035 |
1095 |
/*! \brief Run sampler. |
1036 |
1096 |
* |
1037 |
1097 |
* Run the MCMC sampling procedure, including burning in, sampling, thinning, displaying progress, |
| … | … | @@ -1075,7 +1135,7 @@ public: |
1075 |
1135 |
cout << endl << "Sampling..." << endl; |
1076 |
1136 |
boost::progress_display show_progress(sample_size_); |
1077 |
1137 |
for (int i = 0; i < sample_size_; ++i) { |
1078 |
Iterate(thin_ |
|
1138 |
Iterate(thin_); |
|
1079 |
1139 |
// Save data |
1080 |
1140 |
for (int k = 0; k < tracks_.size(); ++k) { |
1081 |
1141 |
if (k == tracks_.size() - 1) { |
| … | … | @@ -1089,7 +1149,18 @@ public: |
1089 |
1149 |
} |
1090 |
1150 |
|
1091 |
1151 |
outfile.close(); |
1092 |
||
1152 |
||
1153 |
// Save lecuyer seed for future use |
|
1154 |
Matrix<unsigned long, Col> seedmat(6, 1, false); |
|
1155 |
unsigned long seed[6]; |
|
1156 |
myrng.GetState(seed); |
|
1157 |
cout << "Saving seed: " << endl << seed[0] << " " << seed[1] << " " << seed[2] |
|
1158 |
<< " " << seed[3] << " " << seed[4] << " " << seed[5] << endl; |
|
1159 |
for(int i = 0; i < 6; ++i) { |
|
1160 |
seedmat(i) = seed[i]; |
|
1161 |
} |
|
1162 |
seedmat.save("lecuyer.seed", 'o'); |
|
1163 |
||
1093 |
1164 |
cout << "Total elapsed time: " << timer.elapsed() << " seconds" << endl; |
1094 |
1165 |
} |
1095 |
1166 |
/*! \brief Number of steps in one sampler iteration. |
| … | … | @@ -1142,3 +1213,4 @@ double fast_log_dnorm(const double x, co |
1142 |
1213 |
} |
1143 |
1214 |
|
1144 |
1215 |
|
1216 |
