파이썬 코드 idle python 에서 오류?
안녕하세요 제가 코딩을 공부하던중
http://solarisailab.com/archives/486 의 코드를 사용하였습니다
코드는
"""
TensorFlow translation of the torch example found here (written by SeanNaren).
https://github.com/SeanNaren/TorchQLearningExample
Original keras example found here (written by Eder Santana).
https://gist.github.com/EderSantana/c7222daa328f0e885093#file-qlearn-py-L164
The agent plays a game of catch. Fruits drop from the sky and the agent can choose the actions
left/stay/right to catch the fruit before it reaches the ground.
"""
import tensorflow.compat.v1 as tf
tf.disablev2behavior()
import numpy as np
import random
import math
import os
# Parameters
epsilon = 1 # The probability of choosing a random action (in training). This decays as iterations increase. (0 to 1)
epsilonMinimumValue = 0.001 # The minimum value we want epsilon to reach in training. (0 to 1)
nbActions = 3 # The number of actions. Since we only have left/stay/right that means 3 actions.
epoch = 1001 # The number of games we want the system to run for.
hiddenSize = 100 # Number of neurons in the hidden layers.
maxMemory = 500 # How large should the memory be (where it stores its past experiences).
batchSize = 50 # The mini-batch size for training. Samples are randomly taken from memory till mini-batch size.
gridSize = 10 # The size of the grid that the agent is going to play the game on.
nbStates = gridSize * gridSize # We eventually flatten to a 1d tensor to feed the network.
discount = 0.9 # The discount is used to force the network to choose states that lead to the reward quicker (0 to 1)
learningRate = 0.2 # Learning Rate for Stochastic Gradient Descent (our optimizer).
# Create the base model.
X = tf.placeholder(tf.float32, [None, nbStates])
W1 = tf.Variable(tf.truncated_normal([nbStates, hiddenSize], stddev=1.0 / math.sqrt(float(nbStates))))
b1 = tf.Variable(tf.truncated_normal([hiddenSize], stddev=0.01))
input_layer = tf.nn.relu(tf.matmul(X, W1) + b1)
W2 = tf.Variable(tf.truncated_normal([hiddenSize, hiddenSize],stddev=1.0 / math.sqrt(float(hiddenSize))))
b2 = tf.Variable(tf.truncated_normal([hiddenSize], stddev=0.01))
hiddenlayer = tf.nn.relu(tf.matmul(inputlayer, W2) + b2)
W3 = tf.Variable(tf.truncated_normal([hiddenSize, nbActions],stddev=1.0 / math.sqrt(float(hiddenSize))))
b3 = tf.Variable(tf.truncated_normal([nbActions], stddev=0.01))
outputlayer = tf.matmul(hiddenlayer, W3) + b3
# True labels
Y = tf.placeholder(tf.float32, [None, nbActions])
# Mean squared error cost function
cost = tf.reducesum(tf.square(Y-outputlayer)) / (2*batchSize)
# Stochastic Gradient Decent Optimizer
optimizer = tf.train.GradientDescentOptimizer(learningRate).minimize(cost)
# Helper function: Chooses a random value between the two boundaries.
def randf(s, e):
return (float(random.randrange(0, (e - s) * 9999)) / 10000) + s;
# The environment: Handles interactions and contains the state of the environment
class CatchEnvironment():
def init(self, gridSize):
self.gridSize = gridSize
self.nbStates = self.gridSize * self.gridSize
self.state = np.empty(3, dtype = np.uint8)
# Returns the state of the environment.
def observe(self):
canvas = self.drawState()
canvas = np.reshape(canvas, (-1,self.nbStates))
return canvas
def drawState(self):
canvas = np.zeros((self.gridSize, self.gridSize))
canvas[self.state[0]-1, self.state[1]-1] = 1 # Draw the fruit.
# Draw the basket. The basket takes the adjacent two places to the position of basket.
canvas[self.gridSize-1, self.state[2] -1 - 1] = 1
canvas[self.gridSize-1, self.state[2] -1] = 1
canvas[self.gridSize-1, self.state[2] -1 + 1] = 1
return canvas
# Resets the environment. Randomly initialise the fruit position (always at the top to begin with) and bucket.
def reset(self):
initialFruitColumn = random.randrange(1, self.gridSize + 1)
initialBucketPosition = random.randrange(2, self.gridSize + 1 - 1)
self.state = np.array([1, initialFruitColumn, initialBucketPosition])
return self.getState()
def getState(self):
stateInfo = self.state
fruit_row = stateInfo[0]
fruit_col = stateInfo[1]
basket = stateInfo[2]
return fruitrow, fruitcol, basket
# Returns the award that the agent has gained for being in the current environment state.
def getReward(self):
fruitRow, fruitColumn, basket = self.getState()
if (fruitRow == self.gridSize - 1): # If the fruit has reached the bottom.
if (abs(fruitColumn - basket) <= 1): # Check if the basket caught the fruit.
return 1
else:
return -1
else:
return 0
def isGameOver(self):
if (self.state[0] == self.gridSize - 1):
return True
else:
return False
def updateState(self, action):
if (action == 1):
action = -1
elif (action == 2):
action = 0
else:
action = 1
fruitRow, fruitColumn, basket = self.getState()
newBasket = min(max(2, basket + action), self.gridSize - 1) # The min/max prevents the basket from moving out of the grid.
fruitRow = fruitRow + 1 # The fruit is falling by 1 every action.
self.state = np.array([fruitRow, fruitColumn, newBasket])
#Action can be 1 (move left) or 2 (move right)
def act(self, action):
self.updateState(action)
reward = self.getReward()
gameOver = self.isGameOver()
return self.observe(), reward, gameOver, self.getState() # For purpose of the visual, I also return the state.
# The memory: Handles the internal memory that we add experiences that occur based on agent's actions,
# and creates batches of experiences based on the mini-batch size for training.
class ReplayMemory:
def init(self, gridSize, maxMemory, discount):
self.maxMemory = maxMemory
self.gridSize = gridSize
self.nbStates = self.gridSize * self.gridSize
self.discount = discount
canvas = np.zeros((self.gridSize, self.gridSize))
canvas = np.reshape(canvas, (-1,self.nbStates))
self.inputState = np.empty((self.maxMemory, 100), dtype = np.float32)
self.actions = np.zeros(self.maxMemory, dtype = np.uint8)
self.nextState = np.empty((self.maxMemory, 100), dtype = np.float32)
self.gameOver = np.empty(self.maxMemory, dtype = np.bool)
self.rewards = np.empty(self.maxMemory, dtype = np.int8)
self.count = 0
self.current = 0
# Appends the experience to the memory.
def remember(self, currentState, action, reward, nextState, gameOver):
self.actions[self.current] = action
self.rewards[self.current] = reward
self.inputState[self.current, ...] = currentState
self.nextState[self.current, ...] = nextState
self.gameOver[self.current] = gameOver
self.count = max(self.count, self.current + 1)
self.current = (self.current + 1) % self.maxMemory
def getBatch(self, model, batchSize, nbActions, nbStates, sess, X):
# We check to see if we have enough memory inputs to make an entire batch, if not we create the biggest
# batch we can (at the beginning of training we will not have enough experience to fill a batch).
memoryLength = self.count
chosenBatchSize = min(batchSize, memoryLength)
inputs = np.zeros((chosenBatchSize, nbStates))
targets = np.zeros((chosenBatchSize, nbActions))
# Fill the inputs and targets up.
for i in xrange(chosenBatchSize):
if memoryLength == 1:
memoryLength = 2
# Choose a random memory experience to add to the batch.
randomIndex = random.randrange(1, memoryLength)
current_inputState = np.reshape(self.inputState[randomIndex], (1, 100))
target = sess.run(model, feeddict={X: currentinputState})
current_nextState = np.reshape(self.nextState[randomIndex], (1, 100))
currentoutputs = sess.run(model, feeddict={X: current_nextState})
# Gives us Q_sa, the max q for the next state.
nextStateMaxQ = np.amax(current_outputs)
if (self.gameOver[randomIndex] == True):
target[0, [self.actions[randomIndex]-1]] = self.rewards[randomIndex]
else:
# reward + discount(gamma) * max_a' Q(s',a')
# We are setting the Q-value for the action to r + gamma*max a' Q(s', a'). The rest stay the same
# to give an error of 0 for those outputs.
target[0, [self.actions[randomIndex]-1]] = self.rewards[randomIndex] + self.discount * nextStateMaxQ
# Update the inputs and targets.
inputs[i] = current_inputState
targets[i] = target
return inputs, targets
def main(_):
print("Training new model")
# Define Environment
env = CatchEnvironment(gridSize)
# Define Replay Memory
memory = ReplayMemory(gridSize, maxMemory, discount)
# Add ops to save and restore all the variables.
saver = tf.train.Saver()
winCount = 0
with tf.Session() as sess:
tf.initializeallvariables().run()
for i in xrange(epoch):
# Initialize the environment.
err = 0
env.reset()
isGameOver = False
# The initial state of the environment.
currentState = env.observe()
while (isGameOver != True):
action = -9999 # action initilization
# Decides if we should choose a random action, or an action from the policy network.
global epsilon
if (randf(0, 1) <= epsilon):
action = random.randrange(1, nbActions+1)
else:
# Forward the current state through the network.
q = sess.run(outputlayer, feeddict={X: currentState})
# Find the max index (the chosen action).
index = q.argmax()
action = index + 1
# Decay the epsilon by multiplying by 0.999, not allowing it to go below a certain threshold.
if (epsilon > epsilonMinimumValue):
epsilon = epsilon * 0.999
nextState, reward, gameOver, stateInfo = env.act(action)
if (reward == 1):
winCount = winCount + 1
memory.remember(currentState, action, reward, nextState, gameOver)
# Update the current state and if the game is over.
currentState = nextState
isGameOver = gameOver
# We get a batch of training data to train the model.
inputs, targets = memory.getBatch(output_layer, batchSize, nbActions, nbStates, sess, X)
# Train the network which returns the error.
, loss = sess.run([optimizer, cost], feeddict={X: inputs, Y: targets})
err = err + loss
print("Epoch " + str(i) + ": err = " + str(err) + ": Win count = " + str(winCount) + " Win ratio = " + str(float(winCount)/float(i+1)*100))
# Save the variables to disk.
save_path = saver.save(sess, os.getcwd()+"/model.ckpt")
print("Model saved in file: %s" % save_path)
if name == 'main':
tf.app.run()
"""
TensorFlow translation of the torch example found here (written by SeanNaren).
https://github.com/SeanNaren/TorchQLearningExample
Original keras example found here (written by Eder Santana).
https://gist.github.com/EderSantana/c7222daa328f0e885093#file-qlearn-py-L164
The agent plays a game of catch. Fruits drop from the sky and the agent can choose the actions
left/stay/right to catch the fruit before it reaches the ground.
"""
import tensorflow.compat.v1 as tf
tf.disablev2behavior()
import numpy as np
import random
import math
import os
# Parameters
epsilon = 1 # The probability of choosing a random action (in training). This decays as iterations increase. (0 to 1)
epsilonMinimumValue = 0.001 # The minimum value we want epsilon to reach in training. (0 to 1)
nbActions = 3 # The number of actions. Since we only have left/stay/right that means 3 actions.
epoch = 1001 # The number of games we want the system to run for.
hiddenSize = 100 # Number of neurons in the hidden layers.
maxMemory = 500 # How large should the memory be (where it stores its past experiences).
batchSize = 50 # The mini-batch size for training. Samples are randomly taken from memory till mini-batch size.
gridSize = 10 # The size of the grid that the agent is going to play the game on.
nbStates = gridSize * gridSize # We eventually flatten to a 1d tensor to feed the network.
discount = 0.9 # The discount is used to force the network to choose states that lead to the reward quicker (0 to 1)
learningRate = 0.2 # Learning Rate for Stochastic Gradient Descent (our optimizer).
# Create the base model.
X = tf.placeholder(tf.float32, [None, nbStates])
W1 = tf.Variable(tf.truncated_normal([nbStates, hiddenSize], stddev=1.0 / math.sqrt(float(nbStates))))
b1 = tf.Variable(tf.truncated_normal([hiddenSize], stddev=0.01))
input_layer = tf.nn.relu(tf.matmul(X, W1) + b1)
W2 = tf.Variable(tf.truncated_normal([hiddenSize, hiddenSize],stddev=1.0 / math.sqrt(float(hiddenSize))))
b2 = tf.Variable(tf.truncated_normal([hiddenSize], stddev=0.01))
hiddenlayer = tf.nn.relu(tf.matmul(inputlayer, W2) + b2)
W3 = tf.Variable(tf.truncated_normal([hiddenSize, nbActions],stddev=1.0 / math.sqrt(float(hiddenSize))))
b3 = tf.Variable(tf.truncated_normal([nbActions], stddev=0.01))
outputlayer = tf.matmul(hiddenlayer, W3) + b3
# True labels
Y = tf.placeholder(tf.float32, [None, nbActions])
# Mean squared error cost function
cost = tf.reducesum(tf.square(Y-outputlayer)) / (2*batchSize)
# Stochastic Gradient Decent Optimizer
optimizer = tf.train.GradientDescentOptimizer(learningRate).minimize(cost)
# Helper function: Chooses a random value between the two boundaries.
def randf(s, e):
return (float(random.randrange(0, (e - s) * 9999)) / 10000) + s;
# The environment: Handles interactions and contains the state of the environment
class CatchEnvironment():
def init(self, gridSize):
self.gridSize = gridSize
self.nbStates = self.gridSize * self.gridSize
self.state = np.empty(3, dtype = np.uint8)
# Returns the state of the environment.
def observe(self):
canvas = self.drawState()
canvas = np.reshape(canvas, (-1,self.nbStates))
return canvas
def drawState(self):
canvas = np.zeros((self.gridSize, self.gridSize))
canvas[self.state[0]-1, self.state[1]-1] = 1 # Draw the fruit.
# Draw the basket. The basket takes the adjacent two places to the position of basket.
canvas[self.gridSize-1, self.state[2] -1 - 1] = 1
canvas[self.gridSize-1, self.state[2] -1] = 1
canvas[self.gridSize-1, self.state[2] -1 + 1] = 1
return canvas
# Resets the environment. Randomly initialise the fruit position (always at the top to begin with) and bucket.
def reset(self):
initialFruitColumn = random.randrange(1, self.gridSize + 1)
initialBucketPosition = random.randrange(2, self.gridSize + 1 - 1)
self.state = np.array([1, initialFruitColumn, initialBucketPosition])
return self.getState()
def getState(self):
stateInfo = self.state
fruit_row = stateInfo[0]
fruit_col = stateInfo[1]
basket = stateInfo[2]
return fruitrow, fruitcol, basket
# Returns the award that the agent has gained for being in the current environment state.
def getReward(self):
fruitRow, fruitColumn, basket = self.getState()
if (fruitRow == self.gridSize - 1): # If the fruit has reached the bottom.
if (abs(fruitColumn - basket) <= 1): # Check if the basket caught the fruit.
return 1
else:
return -1
else:
return 0
def isGameOver(self):
if (self.state[0] == self.gridSize - 1):
return True
else:
return False
def updateState(self, action):
if (action == 1):
action = -1
elif (action == 2):
action = 0
else:
action = 1
fruitRow, fruitColumn, basket = self.getState()
newBasket = min(max(2, basket + action), self.gridSize - 1) # The min/max prevents the basket from moving out of the grid.
fruitRow = fruitRow + 1 # The fruit is falling by 1 every action.
self.state = np.array([fruitRow, fruitColumn, newBasket])
#Action can be 1 (move left) or 2 (move right)
def act(self, action):
self.updateState(action)
reward = self.getReward()
gameOver = self.isGameOver()
return self.observe(), reward, gameOver, self.getState() # For purpose of the visual, I also return the state.
# The memory: Handles the internal memory that we add experiences that occur based on agent's actions,
# and creates batches of experiences based on the mini-batch size for training.
class ReplayMemory:
def init(self, gridSize, maxMemory, discount):
self.maxMemory = maxMemory
self.gridSize = gridSize
self.nbStates = self.gridSize * self.gridSize
self.discount = discount
canvas = np.zeros((self.gridSize, self.gridSize))
canvas = np.reshape(canvas, (-1,self.nbStates))
self.inputState = np.empty((self.maxMemory, 100), dtype = np.float32)
self.actions = np.zeros(self.maxMemory, dtype = np.uint8)
self.nextState = np.empty((self.maxMemory, 100), dtype = np.float32)
self.gameOver = np.empty(self.maxMemory, dtype = np.bool)
self.rewards = np.empty(self.maxMemory, dtype = np.int8)
self.count = 0
self.current = 0
# Appends the experience to the memory.
def remember(self, currentState, action, reward, nextState, gameOver):
self.actions[self.current] = action
self.rewards[self.current] = reward
self.inputState[self.current, ...] = currentState
self.nextState[self.current, ...] = nextState
self.gameOver[self.current] = gameOver
self.count = max(self.count, self.current + 1)
self.current = (self.current + 1) % self.maxMemory
def getBatch(self, model, batchSize, nbActions, nbStates, sess, X):
# We check to see if we have enough memory inputs to make an entire batch, if not we create the biggest
# batch we can (at the beginning of training we will not have enough experience to fill a batch).
memoryLength = self.count
chosenBatchSize = min(batchSize, memoryLength)
inputs = np.zeros((chosenBatchSize, nbStates))
targets = np.zeros((chosenBatchSize, nbActions))
# Fill the inputs and targets up.
for i in xrange(chosenBatchSize):
if memoryLength == 1:
memoryLength = 2
# Choose a random memory experience to add to the batch.
randomIndex = random.randrange(1, memoryLength)
current_inputState = np.reshape(self.inputState[randomIndex], (1, 100))
target = sess.run(model, feeddict={X: currentinputState})
current_nextState = np.reshape(self.nextState[randomIndex], (1, 100))
currentoutputs = sess.run(model, feeddict={X: current_nextState})
# Gives us Q_sa, the max q for the next state.
nextStateMaxQ = np.amax(current_outputs)
if (self.gameOver[randomIndex] == True):
target[0, [self.actions[randomIndex]-1]] = self.rewards[randomIndex]
else:
# reward + discount(gamma) * max_a' Q(s',a')
# We are setting the Q-value for the action to r + gamma*max a' Q(s', a'). The rest stay the same
# to give an error of 0 for those outputs.
target[0, [self.actions[randomIndex]-1]] = self.rewards[randomIndex] + self.discount * nextStateMaxQ
# Update the inputs and targets.
inputs[i] = current_inputState
targets[i] = target
return inputs, targets
def main(_):
print("Training new model")
# Define Environment
env = CatchEnvironment(gridSize)
# Define Replay Memory
memory = ReplayMemory(gridSize, maxMemory, discount)
# Add ops to save and restore all the variables.
saver = tf.train.Saver()
winCount = 0
with tf.Session() as sess:
tf.initializeallvariables().run()
for i in xrange(epoch):
# Initialize the environment.
err = 0
env.reset()
isGameOver = False
# The initial state of the environment.
currentState = env.observe()
while (isGameOver != True):
action = -9999 # action initilization
# Decides if we should choose a random action, or an action from the policy network.
global epsilon
if (randf(0, 1) <= epsilon):
action = random.randrange(1, nbActions+1)
else:
# Forward the current state through the network.
q = sess.run(outputlayer, feeddict={X: currentState})
# Find the max index (the chosen action).
index = q.argmax()
action = index + 1
# Decay the epsilon by multiplying by 0.999, not allowing it to go below a certain threshold.
if (epsilon > epsilonMinimumValue):
epsilon = epsilon * 0.999
nextState, reward, gameOver, stateInfo = env.act(action)
if (reward == 1):
winCount = winCount + 1
memory.remember(currentState, action, reward, nextState, gameOver)
# Update the current state and if the game is over.
currentState = nextState
isGameOver = gameOver
# We get a batch of training data to train the model.
inputs, targets = memory.getBatch(output_layer, batchSize, nbActions, nbStates, sess, X)
# Train the network which returns the error이.
, loss = sess.run([optimizer, cost], feeddict={X: inputs, Y: targets})
err = err + loss
print("Epoch " + str(i) + ": err = " + str(err) + ": Win count = " + str(winCount) + " Win ratio = " + str(float(winCount)/float(i+1)*100))
# Save the variables to disk.
save_path = saver.save(sess, os.getcwd()+"/model.ckpt")
print("Model saved in file: %s" % save_path)
if name == 'main':
tf.app.run() 입니다
그런데 이런 오류가 생겼습니다
WARNING:tensorflow:From C:\ProgramData\Anaconda3\envs\tens2\lib\site-packages\tensorflowcore\python\compat\v2compat.py:65: disableresourcevariables (from tensorflow.python.ops.variablescope) is deprecated and will be removed in a future version.
Instructions for updating:
non-resource variables are not supported in the long term
Training new model
WARNING:tensorflow:From C:\ProgramData\Anaconda3\envs\tens2\lib\site-packages\tensorflowcore\python\util\tfshoulduse.py:198: initializeallvariables (from tensorflow.python.ops.variables) is deprecated and will be removed after 2017-03-02.
Instructions for updating:
Use tf.globalvariablesinitializer instead.
W0820 22:17:13.656675 9068 deprecation.py:323] From C:\ProgramData\Anaconda3\envs\tens2\lib\site-packages\tensorflowcore\python\util\tfshoulduse.py:198: initializeallvariables (from tensorflow.python.ops.variables) is deprecated and will be removed after 2017-03-02.
Instructions for updating:
Use tf.globalvariablesinitializer instead.
Traceback (most recent call last):
File "C:\Windows\system32\python", line 267, in <module>
tf.app.run()
File "C:\ProgramData\Anaconda3\envs\tens2\lib\site-packages\tensorflowcore\python\platform\app.py", line 40, in run
run(main=main, argv=argv, flagsparser=parseflagstolerateundef)
File "C:\ProgramData\Anaconda3\envs\tens_2\lib\site-packages\absl\app.py", line 299, in run
runmain(main, args)
File "C:\ProgramData\Anaconda3\envs\tens2\lib\site-packages\absl\app.py", line 250, in run_main
sys.exit(main(argv))
File "C:\Windows\system32\python", line 216, in main
for i in xrange(epoch):
NameError: name 'xrange' is not defined
어떻게 해결해야 할까요?
매우 길지만 해결해 주시면 감사하겠습니다 ㅠㅠ
안녕하세요,
NameError: name 'xrange' is not defined
위 에러는 xrange 함수가 정의되지 않았을 때 발생하는 에러 입니다.
xrange는 python 2.x 에서 사용하는 함수이며, python 3.x 에서 사라진 함수입니다.
python 3.x 에서는 range 함수가 python 2.x의 xrange와 동일하게 동작하므로,
for i in range(epoch): 와 같이 수정하시어 사용하시거나,
python 2.x 대의 conda 환경을 새로 만드셔서 사용하시는 것을 추천드립니다.
감사합니다.
안녕하세요.
링크를 보니 예전의 글로 아마도 python2 base인거 같습니다.
아래와 같은 내용을 추가 하시거나
from past.builtins import xrange아니면 python2를 설치하셔서 실행해 보시기 바랍니다.
python2의 xrange가 python3에서는 range로 바뀌었고 그 return type도 변경이 되었습니다.