【问题标题】:OxMetrics - Conditional logit modelOxMetrics - 条件 logit 模型
【发布时间】:2016-05-15 19:27:47
【问题描述】:

我正在尝试为多项(或条件)logit 模型开发 OxMetrics 代码(如http://data.princeton.edu/wws509/notes/c6s2.html 中所述)。 我是 OxMetrics 的新手,仍然难以理解优化例程 (MaxBFGS) 的工作原理。

非常欢迎任何帮助!

// FUNCTION FOR MULTINOMIAL LOGIT MODELLING
//X = Independent variables (e.g. product features)
//Y = Dependent variable (i.e. discrete choices (0/1))
//N = Total number of individuals (N=500)
//T = Number of observations (choice tasks) per respondent (T=20)
//J = Number of products per task (J=2 => choice between two options)
//sv => starting values
//llik => model log-likelihood

LOGIT(const sv, const llik, X, Y, N, T, J)
{
decl num, den, prbj, prbi, maty;
num = reshape(exp(X*sv[0:6]), N*T, J);
den = sumr(num);
prbj = num ./ den;
maty = reshape(Y .== 1, N*T, J);
prbi = sumr(prbj .* maty);
llik[0] = -sumc(log(prbi));
return llik[0];
}

main()
{
decl data, N, T, J, X, Y, sv, dFunc, fit;
data = loadmat("C:/Users/.../Data/data.csv");
X = data[][33] ~ data[][5:10];
Y = data[][12];
N = 500;
T = 20;
J = 2;
sv = <0;0;0;0;0;0;0>;
println ("\nEstimating using MaxBFGS");
fit = MaxBFGS(LOGIT, X=X, Y=Y, N=N, T=T, J=J, &sv, &dFunc, 0, TRUE);
println (MaxConvergenceMsg(fit), " at parameters ", sv', "with function value ", double(dFunc));
}

【问题讨论】:

    标签: estimation mlogit ox


    【解决方案1】:

    非常感谢,您的回答非常有帮助(不知何故,我的大脑很难从 R 切换到 Ox) 不确定这是否是为混合 logit(或随机参数 logit 或内核密度)发布 Ox 的正确位置,但它可能对其他人有用。当然,非常欢迎改进以下代码的想法/想法/技巧!

    声明数据集的格式

    decl N = 433;     // Number of respondents
    decl R = 100;     // Number of draws
    decl T = 8;  // Number of tasks per respondent (!!! same for all respondents)
    decl J = 2;   // Number of options per task (!!! same for all tasks)
    decl Y = <0>;      // choice variable
    decl X = <1;2;3;4;5;6>; // 6 param, including both fixed effects + mean of random effects
    decl RC = <2;3;4;5;6>; // 5 param corresponding to SD of random effects
    decl H, mY, mX, mRC;
    

    计算混合逻辑概率的函数

    fn_MXL(const beta, const obj, const grad, const hess) 
    {
    decl i, seqprb, num, den, sllik;
    seqprb = zeros(N,R);
    for(i = 0; i < R; ++i)
    {
    num = reshape(exp(mX * beta[0:5][] + mRC .* (H[N*T*J*i:(N*T*J*(i+1))-1][]) * beta[6:][]), N*T, J); // !!! Edit "beta" accordingly
    den = sumr(num);
    seqprb[][i] = prodr(reshape(sumr(num ./ den .* mY), N, T));
    }
    sllik = sumc(log(meanr(seqprb)));
    obj[0] = double(sllik);
    return 1;
    }
    

    模型编码/估计

    main()
    {
    decl data, i, id1, id2, Indiv, prime, Halton, sv, ir, obj;
    // 1. Select variables
    data = loadmat("C:/Users/.../choice_data.xls");
    mY = reshape(data[][Y], N*T, J);
    mX = data[][X];
    mRC = data[][RC];
    delete data;
    // 2. Generate halton draws
    id1 = id2 = zeros(N,R); // Create ID variables for both respondents and draws to re-order the draws
    for(i = 0; i < N; ++i)
    {
    id1[i][] = range(1,R); // ID for draws
    id2[i][] = i + 1; // ID for respondents
    }
    Indiv = reshape(id1, N*R, 1) ~ reshape(id2, N*R, 1);
    Halton = zeros(N*R, sizeof(RC));
    prime = <2;3;5;7;11;13;17;19;23;29;31;37;41;43;47;53;59;61;67;71;73;79;83;89;97;101;103;107;109;113>; // List of prime numbers
    for(i = 0; i < sizeof(RC); ++i)
    {
    Halton[][i] = haltonsequence(prime[i], N*R);    // prime number ; number of draws
    }
    Halton = Indiv ~ Halton;
    H = zeros(rows(Halton)*T*J,columns(Halton));     // Duplicate draws (T*J) times to match structure of "mX" (!!! possible to run out of memory)
    for(i = 0; i < (T*J); ++i)
    {
    H[rows(Halton)*i:(rows(Halton)*(i+1)-1)][] = Halton;
    }
    H = sortbyc(H, <0;1>)[][2:]; // Draws are organised as following: Draw > Indiv (e.g. first 5 rows would correspond to 1st draw for first 5 indiv)
    H = quann(H); // Transform halton draws into normal draws
    // 3. Estimation
    sv = <0;0;0;0;0;0;0;0;0;0;0>;      // Starting values for X and RC
    MaxControl(-1, 1, 1);
    ir = MaxBFGS(fn_MXL, &sv, &obj, 0, TRUE);
    print("\n", MaxConvergenceMsg(ir), " using numerical derivatives", "\nFunction value = ", obj, "; parameters:", sv);
    }
    

    【讨论】:

    • 不错的实现,感谢分享。改进它的一个小想法:您可以将代码封装在一个类中以避免全局变量,如probit3.ox 所示。此外,您应该采用特定的命名约定 (en.wikipedia.org/wiki/Naming_convention_%28programming%29) 示例:由于变量 Halton 是一个矩阵,您可以使用 mHalton 其中m 代表矩阵。 Idem 使用 iN 而不是 N 其中 i 代表整数,v 代表向量,d 代表双精度...。它使代码更具可读性。
    【解决方案2】:

    您应该阅读官方 MaxBFGS 帮助(在功能上按“F1”)或here

    这个函数被声明为:

    MaxBFGS(const func, const avP, const adFunc, const amInvHess, const fNumDer);
    
    • 参数func:函数最大化
    • 参数avP:输入--> 起始值矩阵,输出-> 最终系数。因此,您应该首先将所有需要优化的参数存储在单个矩阵中(您的 sv 变量)。
    • 参数amInvHess:设置为0以进行简单估计。
    • 参数fNumDer:设置为0:用于解析一阶导数,设置为1以使用数值一阶导数。

    需要优化的函数必须具有以下原型:

    LOGIT(const vP, const adFunc, const avScore, const amHessian)
    

    作为一个简单的介绍,您可以查看以下示例 (OxMetrics7\ox\samples\maximize\probit1.ox) 以通过 MaxBFGS 估计概率模型 - 它记录在官方文档“Introduction to Ox -Jurgen A. Doornik 和Marius Ooms -2006").

    // file : ...\OxMetrics7\ox\samples\maximize\probit1.ox
    #include <oxstd.oxh>
    #import <maximize>
    
    decl g_mY;                                  // global data
    decl g_mX;                                  // global data
    
    
    ///////////////////////////////////////////////////////////////////
    // Possible improvements:
    // 1) Use log(tailn(g_mX * vP)) instead of log(1-prob)
    //    in fProbit. This is slower, but a bit more accurate.
    // 2) Use analytical first derivatives. This is a bit more robust.
    //    Add numerical computation of standard errors.
    // 3) Use analytical second derivatives and Newton instead
    //    of Quasi-Newton (BFGS). Because the log-likelihood is concave,
    //    we don't really need the robustness of BFGS.
    // 4) Encapsulate in a class to avoid global variables.
    // 5) General starting value routine, etc. etc.
    //
    // probit2.ox implements (2)
    // probit3.ox implements (4)
    // probit4.ox implements (4) in a more general way
    ///////////////////////////////////////////////////////////////////
    
    fProbit(const vP, const adFunc, const avScore,
        const amHessian)
    {
        decl prob = probn(g_mX * vP);   // vP is column vector
    
        adFunc[0] = double(
            meanc(g_mY .* log(prob) + (1-g_mY) .* log(1-prob)));
    
    return 1;                           // 1 indicates success
    }
    
    main()
    {
        decl vp, dfunc, ir;
    
        print("Probit example 1, run on ", date(), ".\n\n");
    
        decl mx = loadmat("data/finney.in7");
    
        g_mY = mx[][0];       // dependent variable: 0,1 dummy
        g_mX = 1 ~ mx[][3:4]; // regressors: 1, Lrate, Lvolume
        delete mx;
    
        vp = <-0.465; 0.842; 1.439>;        // starting values
    
        MaxControl(-1, 1, 1);          // print each iteration
                                                   // maximize
        ir = MaxBFGS(fProbit, &vp, &dfunc, 0, TRUE);
    
        print("\n", MaxConvergenceMsg(ir),
            " using numerical derivatives",
            "\nFunction value = ", dfunc * rows(g_mY),
            "; parameters:", vp);
    }
    

    Ps:您可以为您的X,Y,N,T,J.. 变量使用全局变量,然后改进您的代码(参见 probit2.ox、probit3.ox...)

    【讨论】:

      猜你喜欢
      • 1970-01-01
      • 1970-01-01
      • 2015-09-25
      • 1970-01-01
      • 2022-01-24
      • 2019-01-21
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      相关资源
      最近更新 更多