اطلاعیه

**Mohammad Sadegh Rostami** · 2014-11-23T08:27:31

پاسخ : کمک در مورد این برنامه

سلام
برنامه شما به زبان متلب نوشته نشده!
لینک زیر رو ببین:
لینک
ابتداش توضیح داده که کدهاش به چه زبونیه
و چه طوری باید اجرا بشه
خوشبختانه در انتهاش گفته که برخی از کدهای متلب مثالها هم موجوده
و خودش لینک زیر رو معرفی کرده:
لینک
...
:read:

**javad mollania** · 2014-11-23T14:33:04

پاسخ : کمک در مورد این برنامه

سلام
خیلی ممنون از اینکه پاسخ دادید
اگه میشه این برنامه رو یه تو ضیحی بدید
مرسی

کد:

function [V] = iter_poly_gw_not_inplace()
% ITER_POLY_GW - Performs iterative policy evaluation on the state-value function for the grid world example.
% 
% Iterate Bellman equation: 
% 
% V(s) &lt;- \sum_a \pi(s,a) \sum_{s&#039;} P_{s,s&#039;}^a (R_{s,s&#039;}^a + \gamma V(s&#039;))
% 
% Iterations are not performed in place (i.e. we have two arrays and copy
% between them) 
% 
% where the policy is uniform random steps in either direction. 
% 
% See ePage 253 in the Sutton book.
% 
% Written by:
% -- 
% John L. Weatherwax        2007-12-03
% 
% email: wax@alum.mit.edu
% 
% Please send comments and especially bug reports to the
% above email address.
% 
%-----

%gamma = 0.9; 
gamma = 1;  % &lt;- take this is an undiscounted task 
 
sideL = 4; 
nGrids = sideL^2; 

% An array to hold the values of the state-value function 
% (the elements 1 and 16 are place holders i.e. not used):
Vp = zeros(sideL);
Vc = zeros(sideL);

% some parameters for convergence: 
% 
MAX_N_ITERS = 1000; iterCnt = 0; 
CONV_TOL  = 1e-4; delta = 1e10; 

% a uniform policy: 
pol_pi = 0.25; 

while( (delta &gt; CONV_TOL) &amp;&amp; (iterCnt &lt;= MAX_N_ITERS) ) 
 delta = 0; 
 % update states in the order one indexes matrices
 % states (1,1) and (4,4) are terminal states
 for ii=1:sideL,
  for jj=1:sideL,
   if( (ii==1 &amp;&amp; jj==1) || (ii==sideL &amp;&amp; jj==sideL) ) continue; end 
   
   v   = Vp(ii,jj); 
   v_tmp = 0.0; 
   % loop over each possible action {up,down,right,left}: 
   %
   % action = UP
   if( ii==1 )        % s is ON the top row ... this action does not change our position 
    v_tmp = v_tmp + pol_pi*( -1 + gamma*Vp(ii,jj) ); 
   elseif( ii==2 &amp;&amp; jj==1 )  % s is NOT on the top row but will step into a terminal state (reward is zero)
    %v_tmp = v_tmp + pol_pi*( 0 + gamma*Vp(ii-1,jj) ); 
    v_tmp = v_tmp + pol_pi*( -1 + gamma*Vp(ii-1,jj) ); 
   else            % s is NOT on the top row ... this action moves us up
    v_tmp = v_tmp + pol_pi*( -1 + gamma*Vp(ii-1,jj) ); 
   end
   
   % action = DOWN
   if( ii==sideL )          % s is ON the bottom row ... this action does not change our position 
    v_tmp = v_tmp + pol_pi*( -1 + gamma*Vp(ii,jj) ); 
   elseif( ii==sideL-1 &amp;&amp; jj==sideL ) % s is NOT on the bottom row but will step into a terminal state (reward is zero) 
    %v_tmp = v_tmp + pol_pi*( 0 + gamma*Vp(ii+1,jj) ); 
    v_tmp = v_tmp + pol_pi*( -1 + gamma*Vp(ii+1,jj) ); 
   else                % s is NOT on the bottom row ... this action moves us down
    v_tmp = v_tmp + pol_pi*( -1 + gamma*Vp(ii+1,jj) ); 
   end
   
   % action = RIGHT
   if( jj==sideL )           % s is ON the right most column ... this action does not change our position 
    v_tmp = v_tmp + pol_pi*( -1 + gamma*Vp(ii,jj) ); 
   elseif( jj==sideL-1 &amp;&amp; ii==sideL ) % s is NOT on the right most column but will step into a terminal position (reward is zero) 
    %v_tmp = v_tmp + pol_pi*( 0 + gamma*Vp(ii,jj+1) ); 
    v_tmp = v_tmp + pol_pi*( -1 + gamma*Vp(ii,jj+1) ); 
   else                % s is NOT on the right most column ... this action moves us right
    v_tmp = v_tmp + pol_pi*( -1 + gamma*Vp(ii,jj+1) ); 
   end
   
   % action = LEFT 
   if( jj==1 )              % s is ON the left most column ... this action does not change our position 
    v_tmp = v_tmp + pol_pi*( -1 + gamma*Vp(ii,jj) ); 
   elseif( jj==2 &amp;&amp; ii==1 )       % s is NOT on the left most column but this action will move us into a termial position (reward is zero)
    %v_tmp = v_tmp + pol_pi*( 0 + gamma*Vp(ii,jj-1) ); 
    v_tmp = v_tmp + pol_pi*( -1 + gamma*Vp(ii,jj-1) ); 
   else                 % s is NOT on the left most column ... this action moves us left 
    v_tmp = v_tmp + pol_pi*( -1 + gamma*Vp(ii,jj-1) ); 
   end
   
   % update Vc(ii,jj): 
   Vc(ii,jj) = v_tmp; 
   
   delta = max( [ delta, abs( v-Vc(ii,jj) ) ] ); 
  end % jj loop 
 end % ii loop 
 % overwrite previous with current: 
 Vp = Vc; 
 
 iterCnt=iterCnt+1; 
 % lets print the iterations if desired: 
 if( 0 &amp;&amp; mod(iterCnt,1)==0 )
  fprintf( &#039;iterCnt (k)=%5d; delta=%10.5f\n&#039;, iterCnt, delta ); 
  %disp( fix(Vc*10)/10 ); % &lt;- just display ONE decimal 
  disp( round(Vc*10)/10 ); % &lt;- just display ONE decimal 
  %pause 
 end
end % while loop 

V = Vc;

اطلاعیه

کمک در مورد این برنامه

کمک در مورد این برنامه

دیدگاه

دیدگاه