# موضوع: کمک در مورد اين برنامه

1. ## کمک در مورد اين برنامه

سلام

من تو اين برنامه که يک برنامه gridworld است مشکل دارم چون وقتي تو متلب کپي ميکنم از همون خط اول مشکل ميگيره خواهشا يه کمکي برسونيد .(مثلا تو متلب ميگه که defvarرو نميشناسه )
کد:
(defvar V)
(defvar VV)
(defvar rows)
(defvar columns)
(defvar states)
(defvar AA)
(defvar BB)
(defvar AAprime)
(defvar BBprime)
(defvar gamma 0.9)

(defun setup ()
(setq rows 5)
(setq columns 5)
(setq states 25)
(setq AA (state-from-xy 1 0))
(setq BB (state-from-xy 3 0))
(setq AAprime (state-from-xy 1 4))
(setq BBprime (state-from-xy 3 2))
(setq V (make-array states :initial-element 0.0))
(setq VV (make-array (list rows columns)))
)

(defun compute-V ()
(loop for delta = (loop for x below states
for old-V = (aref V x)
do (setf (aref V x)
(mean (loop for a below 4 collect
(full-backup x a))))
sum (abs (- old-V (aref V x))))
until (&lt; delta 0.000001))
(loop for state below states do
(multiple-value-bind (x y) (xy-from-state state)
(setf (aref VV y x) (aref V state))))
(sfa VV))

(defun compute-V* ()
(loop for delta = (loop for x below states
for old-V = (aref V x)
do (setf (aref V x)
(loop for a below 4 maximize
(full-backup x a)))
sum (abs (- old-V (aref V x))))
until (&lt; delta 0.000001))
(loop for state below states do
(multiple-value-bind (x y) (xy-from-state state)
(setf (aref VV y x) (aref V state))))
(sfa VV))

(defun sfa (array)
&quot;Show Floating-Point Array&quot;
(cond ((= 1 (array-rank array))
(loop for e across array do (format t &quot;~5,1F&quot; e)))
(t (loop for i below (array-dimension array 0) do
(format t &quot;~%&quot;)
(loop for j below (array-dimension array 1) do
(format t &quot;~5,1F&quot; (aref array i j)))))))

(defun full-backup (x a)
(let (r y)
(cond ((= x AA)
(setq r +10)
(setq y AAprime))
((= x BB)
(setq r +5)
(setq y BBprime))
((off-grid x a)
(setq r -1)
(setq y x))
(t
(setq r 0)
(setq y (next-state x a))))
(+ r (* gamma (aref V y)))))

(defun off-grid (state a)
(multiple-value-bind (x y) (xy-from-state state)
(case a
(0 (incf y) (&gt;= y rows))
(1 (incf x) (&gt;= x columns))
(2 (decf y) (&lt; y 0))
(3 (decf x) (&lt; x 0)))))

(defun next-state (state a)
(multiple-value-bind (x y) (xy-from-state state)
(case a
(0 (incf y))
(1 (incf x))
(2 (decf y))
(3 (decf x)))
(state-from-xy x y)))

(defun state-from-xy (x y)
(+ y (* x columns)))

(defun xy-from-state (state)
(truncate state columns))
2. ## پاسخ : کمک در مورد اين برنامه

سلام
برنامه شما به زبان متلب نوشته نشده!
لينک زير رو ببين:

ابتداش توضيح داده که کدهاش به چه زبونيه
و چه طوري بايد اجرا بشه
خوشبختانه در انتهاش گفته که برخي از کدهاي متلب مثالها هم موجوده
و خودش لينک زير رو معرفي کرده:

...
:read:
3. ## پاسخ : کمک در مورد اين برنامه

سلام
خيلي ممنون از اينکه پاسخ داديد
اگه ميشه اين برنامه رو يه تو ضيحي بديد
مرسي
کد:
function [V] = iter_poly_gw_not_inplace()
% ITER_POLY_GW - Performs iterative policy evaluation on the state-value function for the grid world example.
%
% Iterate Bellman equation:
%
% V(s) &lt;- \sum_a \pi(s,a) \sum_{s&#039;} P_{s,s&#039;}^a (R_{s,s&#039;}^a + \gamma V(s&#039;))
%
% Iterations are not performed in place (i.e. we have two arrays and copy
% between them)
%
% where the policy is uniform random steps in either direction.
%
% See ePage 253 in the Sutton book.
%
% Written by:
% --
% John L. Weatherwax        2007-12-03
%
% email: wax@alum.mit.edu
%
% Please send comments and especially bug reports to the
% above email address.
%
%-----

%gamma = 0.9;
gamma = 1;  % &lt;- take this is an undiscounted task

sideL = 4;
nGrids = sideL^2;

% An array to hold the values of the state-value function
% (the elements 1 and 16 are place holders i.e. not used):
Vp = zeros(sideL);
Vc = zeros(sideL);

% some parameters for convergence:
%
MAX_N_ITERS = 1000; iterCnt = 0;
CONV_TOL  = 1e-4; delta = 1e10;

% a uniform policy:
pol_pi = 0.25;

while( (delta &gt; CONV_TOL) &amp;&amp; (iterCnt &lt;= MAX_N_ITERS) )
delta = 0;
% update states in the order one indexes matrices
% states (1,1) and (4,4) are terminal states
for ii=1:sideL,
for jj=1:sideL,
if( (ii==1 &amp;&amp; jj==1) || (ii==sideL &amp;&amp; jj==sideL) ) continue; end

v   = Vp(ii,jj);
v_tmp = 0.0;
% loop over each possible action {up,down,right,left}:
%
% action = UP
if( ii==1 )        % s is ON the top row ... this action does not change our position
v_tmp = v_tmp + pol_pi*( -1 + gamma*Vp(ii,jj) );
elseif( ii==2 &amp;&amp; jj==1 )  % s is NOT on the top row but will step into a terminal state (reward is zero)
%v_tmp = v_tmp + pol_pi*( 0 + gamma*Vp(ii-1,jj) );
v_tmp = v_tmp + pol_pi*( -1 + gamma*Vp(ii-1,jj) );
else            % s is NOT on the top row ... this action moves us up
v_tmp = v_tmp + pol_pi*( -1 + gamma*Vp(ii-1,jj) );
end

% action = DOWN
if( ii==sideL )          % s is ON the bottom row ... this action does not change our position
v_tmp = v_tmp + pol_pi*( -1 + gamma*Vp(ii,jj) );
elseif( ii==sideL-1 &amp;&amp; jj==sideL ) % s is NOT on the bottom row but will step into a terminal state (reward is zero)
%v_tmp = v_tmp + pol_pi*( 0 + gamma*Vp(ii+1,jj) );
v_tmp = v_tmp + pol_pi*( -1 + gamma*Vp(ii+1,jj) );
else                % s is NOT on the bottom row ... this action moves us down
v_tmp = v_tmp + pol_pi*( -1 + gamma*Vp(ii+1,jj) );
end

% action = RIGHT
if( jj==sideL )           % s is ON the right most column ... this action does not change our position
v_tmp = v_tmp + pol_pi*( -1 + gamma*Vp(ii,jj) );
elseif( jj==sideL-1 &amp;&amp; ii==sideL ) % s is NOT on the right most column but will step into a terminal position (reward is zero)
%v_tmp = v_tmp + pol_pi*( 0 + gamma*Vp(ii,jj+1) );
v_tmp = v_tmp + pol_pi*( -1 + gamma*Vp(ii,jj+1) );
else                % s is NOT on the right most column ... this action moves us right
v_tmp = v_tmp + pol_pi*( -1 + gamma*Vp(ii,jj+1) );
end

% action = LEFT
if( jj==1 )              % s is ON the left most column ... this action does not change our position
v_tmp = v_tmp + pol_pi*( -1 + gamma*Vp(ii,jj) );
elseif( jj==2 &amp;&amp; ii==1 )       % s is NOT on the left most column but this action will move us into a termial position (reward is zero)
%v_tmp = v_tmp + pol_pi*( 0 + gamma*Vp(ii,jj-1) );
v_tmp = v_tmp + pol_pi*( -1 + gamma*Vp(ii,jj-1) );
else                 % s is NOT on the left most column ... this action moves us left
v_tmp = v_tmp + pol_pi*( -1 + gamma*Vp(ii,jj-1) );
end

% update Vc(ii,jj):
Vc(ii,jj) = v_tmp;

delta = max( [ delta, abs( v-Vc(ii,jj) ) ] );
end % jj loop
end % ii loop
% overwrite previous with current:
Vp = Vc;

iterCnt=iterCnt+1;
% lets print the iterations if desired:
if( 0 &amp;&amp; mod(iterCnt,1)==0 )
fprintf( &#039;iterCnt (k)=%5d; delta=%10.5f\n&#039;, iterCnt, delta );
%disp( fix(Vc*10)/10 ); % &lt;- just display ONE decimal
disp( round(Vc*10)/10 ); % &lt;- just display ONE decimal
%pause
end
end % while loop

V = Vc;

