Files
real-shooter/test-ml-agents.ipynb

20624 lines
1.2 MiB

{
"cells": [
{
"cell_type": "code",
"execution_count": 34,
"id": "8104e2db-b1a8-40a0-a238-5d9149fd74b0",
"metadata": {},
"outputs": [],
"source": [
"from mlagents_envs.environment import UnityEnvironment\n",
"import mlagents_envs\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 74,
"id": "6f477382-acc9-4aec-907a-7f58caf955ed",
"metadata": {},
"outputs": [],
"source": [
"import random"
]
},
{
"cell_type": "code",
"execution_count": 83,
"id": "b7f60f26-0a90-4ea5-b2c2-b5683bda56a6",
"metadata": {},
"outputs": [],
"source": [
"env = UnityEnvironment()"
]
},
{
"cell_type": "code",
"execution_count": 84,
"id": "5929b410-12c3-4bd9-b984-b2c29a76c3f3",
"metadata": {},
"outputs": [],
"source": [
"env.reset()"
]
},
{
"cell_type": "code",
"execution_count": 85,
"id": "f108ff09-9f42-4405-add3-6df941c48f8b",
"metadata": {
"scrolled": true,
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0af0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0250>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 1. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x1052cafd0>, <mlagents_envs.base_env.TerminalSteps object at 0x1052ca9d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0220>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0af0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb880>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb070>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca09a0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0eb0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb3d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb5b0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67370>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebb20>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebc40>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebd30>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c674c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceba00>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0e80>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0580>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0790>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67bb0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0af0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0160>, <mlagents_envs.base_env.TerminalSteps object at 0x107c674c0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0b50>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca01f0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0160>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0370>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0070>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd00d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca03d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0340>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67bb0>, <mlagents_envs.base_env.TerminalSteps object at 0x107c672e0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0bb0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca07f0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0970>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67340>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 1. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67370>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0d30>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0580>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca01f0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca04c0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd00d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0160>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107434eb0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce0460>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca02e0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca09a0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce01f0>, <mlagents_envs.base_env.TerminalSteps object at 0x107434eb0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c674c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67340>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0a30>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0250>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0340>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd01c0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0eb0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca02b0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0190>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0c40>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x1052bbd90>, <mlagents_envs.base_env.TerminalSteps object at 0x1052cafd0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67e80>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67bb0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0070>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0ac0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce0520>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67bb0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd03d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x1052bbd90>, <mlagents_envs.base_env.TerminalSteps object at 0x1052ca100>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca03d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca02b0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd03d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67340>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 1. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0bb0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd00d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0610>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca01f0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0f70>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0b20>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0d60>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67bb0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca07f0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67a90>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0610>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0d60>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0580>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca02b0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0ca0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd00d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0a60>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 78.21462], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0d60>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0070>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0d00>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0af0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0970>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0610>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0bb0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca08e0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0160>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd00d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca04c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0af0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0160>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0ac0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0940>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd01c0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67910>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67fa0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb9d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebfd0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 1. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebeb0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceba30>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x1052ca9d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb4f0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0d00>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0220>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb070>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb4f0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0730>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca09a0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x1052caf40>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca04c0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebee0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb100>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebeb0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebaf0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0b50>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca02b0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca04c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0490>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0ac0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd01c0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0370>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0610>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd00d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0a30>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x1052bbd90>, <mlagents_envs.base_env.TerminalSteps object at 0x1052caf40>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0f70>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0c70>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0370>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0340>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca01f0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0be0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0d30>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0a60>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0700>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0940>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0160>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 1. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb5b0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebfd0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd00d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0580>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb070>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceba00>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0940>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0070>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb9d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb490>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67910>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0940>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0160>, <mlagents_envs.base_env.TerminalSteps object at 0x107c674c0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb5b0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb340>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0940>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67bb0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebfd0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb7f0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0160>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0580>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb5b0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb520>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x1052caf40>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce07c0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb9d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceba30>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd03d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0ac0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebaf0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceba30>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0b20>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67a90>, <mlagents_envs.base_env.TerminalSteps object at 0x107c674c0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb700>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebee0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0b20>, <mlagents_envs.base_env.TerminalSteps object at 0x1052cad60>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 1. , 1. , 0. , 0. ,\n",
" 0. , 80.694435], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb100>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebaf0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c672e0>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67e80>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd03d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0580>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb340>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb520>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce0700>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce01f0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb4f0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebb20>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0b20>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0580>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb4f0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb520>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0940>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0610>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb4c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebf40>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0160>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb3a0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebdf0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0b20>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67bb0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0610>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebeb0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb3d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0970>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0610>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce0580>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebf40>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebd60>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0070>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0d60>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb6d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb760>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x10744b7f0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce04c0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0b20>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd00d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0ac0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0940>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb940>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb700>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67a90>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67910>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb0d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebeb0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0160>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0ac0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebf40>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb520>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0ac0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0070>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67a90>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebf40>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0160>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd01c0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb4c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb6d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0370>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67bb0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceba30>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceba00>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0160>, <mlagents_envs.base_env.TerminalSteps object at 0x1052caf40>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebdf0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb8e0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0340>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd00d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67a90>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb100>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb7f0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebfd0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0970>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0b20>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c674c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0d60>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0d00>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca03d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd00d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd03d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca00a0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0d90>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd03d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd01c0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0970>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0070>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67340>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67e80>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0d30>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0d90>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67e80>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67fa0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0070>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd00d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0a60>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0eb0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0160>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0610>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c672e0>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67bb0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0eb0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0a30>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0070>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0340>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67a90>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0b20>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0340>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca02b0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd00d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67340>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0a30>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0be0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0ac0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0b20>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0070>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0c70>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd00d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0d60>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0c70>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca02e0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca02b0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca09a0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0d60>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd01c0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67340>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0610>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0e80>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0c40>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0340>, <mlagents_envs.base_env.TerminalSteps object at 0x107c672e0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0730>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca01f0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0370>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd03d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0220>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0c70>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd03d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd00d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c674c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107c672e0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0070>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0940>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67bb0>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67340>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0370>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67e80>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca02b0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67a90>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0070>, <mlagents_envs.base_env.TerminalSteps object at 0x107c674c0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0a60>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca07f0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67370>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67a90>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0190>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0340>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0b20>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0220>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0fd0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0b20>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0ac0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0eb0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca01c0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0340>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0160>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0d60>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0340>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0610>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd00d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0ac0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67910>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67bb0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0580>, <mlagents_envs.base_env.TerminalSteps object at 0x10744b7f0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca08e0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca09a0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd00d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd03d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0af0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0d30>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0ac0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0610>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0bb0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0220>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd00d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0940>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0370>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0700>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0bb0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.29597], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebc40>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceba00>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebd60>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebfd0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0ca0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0b50>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67fa0>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67a90>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebee0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb4f0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebc40>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceba00>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0220>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0580>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c672e0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb8e0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0070>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0580>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb430>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb4f0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0d00>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0070>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb100>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb430>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0af0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0250>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca00a0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca07f0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb6d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebc40>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb520>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb700>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb430>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebaf0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb3d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb6d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67a90>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0bb0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca02b0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0fd0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 1. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x1052cafd0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebf40>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0d60>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0340>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0d90>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0c70>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0d60>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0340>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca07f0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca03d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c672e0>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67a90>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0220>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0e80>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0970>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0340>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0610>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67fa0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce0700>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce0580>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca09a0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0220>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0b20>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0580>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0730>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0790>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0ac0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0610>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0a60>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca08e0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0b20>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce0520>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce01f0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0730>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0790>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd03d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce0520>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0a30>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca09a0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 1. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0340>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0d30>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0e80>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0d90>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca07f0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebaf0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb430>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca03d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca02e0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb070>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebaf0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x1052ca9d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb4f0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0f70>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0070>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb760>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb4f0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c672e0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb3a0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 90.925804], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb5b0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb7f0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca08e0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca01c0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebf40>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb5b0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb520>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb760>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0a60>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0e80>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca02e0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca02b0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67e80>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0160>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0370>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0fd0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca01f0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67e80>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0e80>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0b50>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd01c0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0a60>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca04c0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0d60>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0940>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0610>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0160>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67a90>, <mlagents_envs.base_env.TerminalSteps object at 0x1052ca9d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0e80>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca08e0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce0460>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0d60>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0190>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0490>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0160>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67340>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca03d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca08e0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0ac0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0b20>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0160>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0580>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0700>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0490>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce01f0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0340>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0610>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0160>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0d00>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0340>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0970>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd03d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0bb0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0d30>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0ac0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0b20>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0220>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0f70>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd03d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0160>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0250>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0580>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0250>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0070>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0340>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca01f0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce07c0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0d00>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0940>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0340>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0190>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0700>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0b20>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0d60>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0340>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0af0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd03d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0340>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca03d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0250>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd00d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0610>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0eb0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0c40>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67a90>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0370>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0070>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67340>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0160>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67fa0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca04c0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.28178], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0610>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67a90>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0b20>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0070>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0580>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0370>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd01c0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0250>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0a60>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0f70>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0340>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0b20>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67340>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0a60>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0700>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0160>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0d60>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0b20>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0970>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce0520>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce02b0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x1052caf40>, <mlagents_envs.base_env.TerminalSteps object at 0x1052cafd0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0c70>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca07f0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0160>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0340>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce07c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce01f0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0c70>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0790>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0610>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0b20>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0070>, <mlagents_envs.base_env.TerminalSteps object at 0x1052ca100>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0bb0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca01c0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0610>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0370>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce07c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce04c0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0610>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0940>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca00a0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0340>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0370>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce07c0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0af0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0a30>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd00d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0b20>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0d90>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0fd0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0970>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0580>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0d30>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca09a0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0370>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0b20>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0d60>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd00d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca00a0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0730>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0070>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd01c0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0970>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0220>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca02e0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67910>, <mlagents_envs.base_env.TerminalSteps object at 0x107c672e0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67340>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0580>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0eb0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0c40>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67fa0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0580>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0340>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0af0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0b20>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0d60>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0b50>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0340>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67340>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0a60>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0790>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0070>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0160>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca07f0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0d00>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x1052caf40>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0070>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca04c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0b50>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67bb0>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67910>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0be0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0c40>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26389], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0610>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0b20>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67370>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0190>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0ac0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0940>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca08e0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0700>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebeb0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb8e0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67a90>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67bb0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c672e0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebeb0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0370>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb340>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb3a0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0970>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd01c0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67fa0>, <mlagents_envs.base_env.TerminalSteps object at 0x1052cad60>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0fd0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0a30>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0940>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0970>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd03d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107c672e0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0610>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67a90>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67fa0>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67bb0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce0520>, <mlagents_envs.base_env.TerminalSteps object at 0x107c674c0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce04c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce0700>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0370>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0340>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0610>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce0520>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0340>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd03d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0070>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0340>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca03d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd03d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0610>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0ac0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca08e0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0700>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0370>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca00a0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0580>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0d60>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd03d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0940>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0160>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c674c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67bb0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0940>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0b20>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0a30>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0490>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0070>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0340>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0730>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca08e0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0340>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0940>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca00a0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0340>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0580>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0be0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0d30>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x10744b7f0>, <mlagents_envs.base_env.TerminalSteps object at 0x1052caf40>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0700>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0ca0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x10744b7f0>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67bb0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca03d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0e80>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x1052ca9d0>, <mlagents_envs.base_env.TerminalSteps object at 0x10744b7f0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0730>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca00a0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd03d0>, <mlagents_envs.base_env.TerminalSteps object at 0x1052ca9d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca08e0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0220>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0d00>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0a30>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0340>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd00d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd03d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0070>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0160>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd03d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0970>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0d60>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67910>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd01c0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca03d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0be0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67370>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67a90>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0790>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0eb0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0700>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca02b0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x1052cad60>, <mlagents_envs.base_env.TerminalSteps object at 0x1052ca9d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca01f0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0a60>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0ac0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0580>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67a90>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0220>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0070>, <mlagents_envs.base_env.TerminalSteps object at 0x1052bbd90>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca09a0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0340>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd00d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd03d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0610>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0ca0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0250>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce04c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0b20>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca00a0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0070>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0340>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce0700>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca07f0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca02e0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0340>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0b20>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb3a0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebc40>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebd30>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebb20>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0af0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0fd0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0b20>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceba00>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceba30>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0070>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca01f0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x1052bbd90>, <mlagents_envs.base_env.TerminalSteps object at 0x1052caf40>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0580>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0220>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca09a0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb070>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb4f0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0160>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd00d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb9d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebd30>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebf40>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb790>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0bb0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0250>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca02e0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0f70>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0b50>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca08e0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb490>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb790>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0970>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd03d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0070>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0d60>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce0700>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0af0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce0520>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0340>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca03d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd00d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd01c0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0340>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0d60>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0d00>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0be0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0070>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce04c0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca08e0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0700>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0370>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd00d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0250>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67bb0>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67e80>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0f70>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0b50>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0070>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67910>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0a60>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd00d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0970>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0940>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0be0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca02e0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0340>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebee0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb9d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67340>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67bb0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 1. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb880>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebaf0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd03d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0340>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb430>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb3d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb070>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb520>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceba30>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0940>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0b20>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebb20>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb790>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0d60>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd00d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb790>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb8e0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0070>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0970>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb0d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb4c0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0070>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0d60>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67a90>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0340>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceba00>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb070>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0370>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0160>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb520>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb940>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67910>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67340>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb4c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb340>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0970>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd01c0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce01f0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb340>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 1. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0ac0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0370>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebf40>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb8e0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0ac0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd03d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67910>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67340>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce04c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebd60>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0d60>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb100>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb340>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0610>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce07c0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0a60>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0340>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce07c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0940>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca00a0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0d00>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67a90>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67fa0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca01f0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca01c0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0d30>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca00a0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb490>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebd30>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca02e0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0bb0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceba00>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb790>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0eb0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca04c0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0970>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0610>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0ac0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0b20>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 1. , 1. , 0. , 0. ,\n",
" 0. , 83.26209], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0370>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0610>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb940>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb520>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0370>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0610>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb6d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb4f0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67bb0>, <mlagents_envs.base_env.TerminalSteps object at 0x1052ca9d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0d60>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb880>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0940>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd00d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0d60>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb340>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb4c0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0970>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0370>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceba30>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb8e0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0b20>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb0d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb700>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0610>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0340>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebb20>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebfd0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0070>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67e80>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebdf0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb430>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0d60>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb3a0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb490>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd00d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0370>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0070>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0340>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca02e0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0be0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0ac0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd03d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd00d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0160>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67a90>, <mlagents_envs.base_env.TerminalSteps object at 0x10744b7f0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca09a0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0490>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0610>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0ac0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0b20>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0580>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce07c0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c674c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67910>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0970>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0220>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca02e0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x1052ca100>, <mlagents_envs.base_env.TerminalSteps object at 0x1052cafd0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c672e0>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67e80>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0fd0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0940>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0340>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0730>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0c70>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107434eb0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0940>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0d60>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca04c0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0940>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd00d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x1052ca9d0>, <mlagents_envs.base_env.TerminalSteps object at 0x1052bbd90>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca00a0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0a60>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca04c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0bb0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0160>, <mlagents_envs.base_env.TerminalSteps object at 0x1052caf40>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0e80>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0c40>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67e80>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67a90>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd03d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0370>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0c40>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca07f0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce02b0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce0700>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca03d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0c40>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceba00>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb940>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb070>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceba30>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0340>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x1052ca100>, <mlagents_envs.base_env.TerminalSteps object at 0x1052caf40>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0070>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca09a0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd03d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd00d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce0700>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0c70>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0d60>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0a30>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0490>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0340>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0070>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0d30>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0ca0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0970>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0190>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0e80>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0610>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd03d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0070>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0220>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce01f0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0970>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0d60>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd03d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb430>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceba30>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce04c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd01c0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca00a0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca09a0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca02b0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0b50>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x1052ca100>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebee0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0250>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0190>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca01f0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca03d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x1052cafd0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca04c0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb3d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebaf0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb0d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceba00>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0160>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0ac0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceba30>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb4c0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0610>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0970>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebd60>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb940>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67910>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0610>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd03d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb6d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0340>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0fd0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0220>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0d90>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb880>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebaf0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb6d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebf40>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67fa0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0070>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67a90>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67910>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce01f0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca01f0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0ca0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67e80>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67910>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd00d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0070>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0610>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0370>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0a60>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0eb0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67a90>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0970>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0a60>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0eb0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67bb0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0580>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0be0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca03d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67e80>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67fa0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0700>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0220>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0370>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd00d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0c40>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0970>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0ac0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67910>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67370>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca03d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0c70>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd00d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0ac0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0610>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0070>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0ac0>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67910>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0970>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0220>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0d30>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0eb0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebee0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb940>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x1052ca100>, <mlagents_envs.base_env.TerminalSteps object at 0x1052ca9d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67370>, <mlagents_envs.base_env.TerminalSteps object at 0x107c674c0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb100>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb490>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0be0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca00a0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce0460>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0580>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebc40>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebeb0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0790>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0a60>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd03d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0580>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x1052ca100>, <mlagents_envs.base_env.TerminalSteps object at 0x1052caf40>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0a30>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca02b0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0340>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0610>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0940>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0700>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0b50>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0070>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0f70>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0070>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0160>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0af0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0730>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67340>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67370>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce04c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0070>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca04c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0af0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0610>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0160>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0790>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0a60>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd03d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107c674c0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0eb0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0070>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0970>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0370>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0d60>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0610>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0070>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0a30>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0ac0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd03d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0940>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0070>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0340>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0e80>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0d60>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0940>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebf40>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb9d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0070>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67370>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce04c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebf40>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0b20>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0970>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebd60>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebeb0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0160>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd03d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebb20>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb4c0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd00d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0d60>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25988], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceba00>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb700>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0970>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0580>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb700>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb6d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0610>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0580>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebee0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb790>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0970>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd00d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb8e0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebb20>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0ac0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0940>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce0700>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb070>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd03d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0940>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67a90>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce02b0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0790>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca00a0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c672e0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0d60>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c674c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca00a0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0d60>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd01c0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0c40>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0ca0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0940>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd00d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67910>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0610>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca08e0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0a30>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0610>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0ac0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0070>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0970>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0490>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0e80>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0940>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0580>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0bb0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca01f0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb490>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb7f0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0d00>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0250>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c672e0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb490>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67370>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67a90>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb880>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb3d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0970>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0940>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebee0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb8e0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67910>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67e80>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0730>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0d90>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67a90>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb340>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67370>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb760>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca03d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0730>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0700>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd00d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67340>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca02b0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca07f0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0340>, <mlagents_envs.base_env.TerminalSteps object at 0x107c674c0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb940>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebeb0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67fa0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0b20>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb700>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebee0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb5b0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebc40>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0d60>, <mlagents_envs.base_env.TerminalSteps object at 0x1052ca9d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb490>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb790>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0940>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0580>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb340>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebb20>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0160>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0d60>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb700>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebc40>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.26341], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0d60>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0970>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebeb0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb9d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd00d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0610>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebee0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceba00>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0070>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd03d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebb20>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebaf0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0b20>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb4c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebeb0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0340>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0970>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb4f0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb0d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0610>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0ac0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebd60>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb490>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0b20>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0340>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb340>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb430>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0610>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0160>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67fa0>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67a90>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd00d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0610>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebd30>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb8e0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c674c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67340>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebf40>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb880>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67910>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67370>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0730>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0e80>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce04c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0220>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb6d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebc40>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca03d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0490>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb880>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb6d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca09a0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0d90>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb520>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb6d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0d00>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca02b0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0d90>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca09a0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce02b0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca01f0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb3a0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb5b0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67910>, <mlagents_envs.base_env.TerminalSteps object at 0x107c674c0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebd60>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb3a0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x10744b7f0>, <mlagents_envs.base_env.TerminalSteps object at 0x107434eb0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce04c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb340>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca07f0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca03d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0070>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0580>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0940>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0580>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0970>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0a30>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca08e0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0d90>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0220>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0af0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0790>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0610>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0970>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca02e0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0b50>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce04c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce07c0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0610>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67e80>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67910>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x1052bbd90>, <mlagents_envs.base_env.TerminalSteps object at 0x107434eb0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca07f0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca02b0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30755], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd00d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0970>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0340>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0160>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca00a0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca01c0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107434eb0>, <mlagents_envs.base_env.TerminalSteps object at 0x1052ca9d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0730>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0490>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0b20>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0250>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0610>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0070>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0730>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca08e0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd00d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67fa0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 1. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca02b0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0250>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd00d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0580>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0d60>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0070>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0370>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0340>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce0460>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x1052cafd0>, <mlagents_envs.base_env.TerminalSteps object at 0x1052ca9d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce04c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce0520>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0940>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd01c0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x1052ca100>, <mlagents_envs.base_env.TerminalSteps object at 0x1052caf40>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce02b0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce07c0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0610>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd00d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca07f0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca01f0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb8e0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb700>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce0460>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce0700>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebf40>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb5b0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce02b0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce04c0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67fa0>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67bb0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb4f0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebc40>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0370>, <mlagents_envs.base_env.TerminalSteps object at 0x1052ca9d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0970>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67a90>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 1. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca00a0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca01c0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67bb0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0070>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca03d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0d60>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0490>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0730>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0d60>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd03d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce01f0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0580>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0610>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd00d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0d90>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0730>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd00d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0d60>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce0520>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce04c0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x1052caf40>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce02b0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67a90>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67370>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0370>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce07c0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0ca0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0d90>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce07c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce0460>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0250>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0a30>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c672e0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce07c0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0190>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0490>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67fa0>, <mlagents_envs.base_env.TerminalSteps object at 0x107c674c0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 1. , 1. , 0. , 0. ,\n",
" 0. , 85.95022], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0250>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0340>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67fa0>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67bb0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0070>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0490>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67a90>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67910>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0e80>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca02b0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce04c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce0580>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x1052cafd0>, <mlagents_envs.base_env.TerminalSteps object at 0x1052cad60>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67370>, <mlagents_envs.base_env.TerminalSteps object at 0x107c672e0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0d90>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c672e0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce01f0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0250>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x1052caf40>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0be0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce01f0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0190>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca00a0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce04c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce01f0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0160>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67fa0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0610>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0e80>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0730>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0160>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67fa0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67a90>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce0700>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0790>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0c40>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67bb0>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67370>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x1052caf40>, <mlagents_envs.base_env.TerminalSteps object at 0x1052cafd0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67370>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67fa0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67340>, <mlagents_envs.base_env.TerminalSteps object at 0x107c672e0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0700>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0a60>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0f70>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0c40>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0b20>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd03d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0c40>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca09a0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce01f0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce0580>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0a60>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0bb0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0ac0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0370>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca08e0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0070>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb520>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb100>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0370>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb100>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb3d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca09a0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0af0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0070>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0d60>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebeb0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb100>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd00d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0b20>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb3a0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb6d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0d60>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb520>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebee0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c672e0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0580>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd03d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0d60>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb700>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb8e0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd03d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107c672e0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebfd0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb070>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd00d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107c674c0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb4f0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb430>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.14938], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0340>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0d60>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c672e0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebaf0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67bb0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb3a0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebee0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0160>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67e80>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb430>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb0d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c674c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67fa0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb4c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebf40>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0c40>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca01f0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb6d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebc40>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd00d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce01f0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce0700>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce04c0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb700>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb340>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0220>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0700>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0940>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0b20>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca07f0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0d90>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb3d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb8e0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0d30>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca04c0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceba00>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb070>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca01f0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0f70>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb760>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb5b0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0bb0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0340>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb070>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb4f0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0970>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0070>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb490>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebaf0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd03d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0580>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0940>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0160>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebd60>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb790>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0370>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce0460>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb790>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb9d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebfd0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0b20>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebd60>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb940>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd03d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0160>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebaf0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebfd0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0610>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb5b0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb8e0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0ac0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0580>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb9d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb0d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0d60>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0370>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb430>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb070>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0940>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0970>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce0520>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb3d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebf40>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0ac0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb520>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb700>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd03d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0070>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebd30>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb100>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0970>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd00d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb790>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebdf0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.25333], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0b20>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0ac0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb490>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceba00>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd03d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebc40>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb700>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebfd0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebee0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0160>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0b20>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb4f0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceba30>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0370>, <mlagents_envs.base_env.TerminalSteps object at 0x107c672e0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebdf0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb8e0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0940>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd00d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebeb0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebb20>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0370>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67a90>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb4c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebfd0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0940>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb0d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb100>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0610>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0340>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c672e0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebdf0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebb20>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb520>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0370>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd00d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0ac0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca07f0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0ca0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0970>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0ac0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca09a0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd00d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce0580>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0b50>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0af0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0160>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd00d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0970>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0070>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca00a0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0e80>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce04c0>, <mlagents_envs.base_env.TerminalSteps object at 0x1052ca9d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0b50>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0c70>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0370>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd03d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0fd0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0070>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0160>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce01f0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0490>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd00d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0610>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca00a0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca01c0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd03d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0970>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce01f0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce0520>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb4c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebd60>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0ac0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0160>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107434eb0>, <mlagents_envs.base_env.TerminalSteps object at 0x1052cafd0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67370>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67340>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb430>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb940>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0940>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd01c0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c674c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67910>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebdf0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb8e0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0160>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0070>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb760>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebeb0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0940>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0b20>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebfd0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb9d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0970>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0d60>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb760>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb340>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0340>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0580>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebf40>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb4f0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0ac0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebee0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb790>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0160>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0070>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebd60>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb490>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0d60>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd00d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceba00>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb430>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0070>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb100>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceba30>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0d60>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd00d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb340>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb5b0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce0700>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67370>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb940>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebeb0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebfd0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb490>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb7f0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0ac0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0160>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb8e0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb0d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0ac0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0160>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0c40>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0d90>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0160>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0b20>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67a90>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67370>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca09a0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0700>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0970>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0160>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb070>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceba30>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0ac0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebf40>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb5b0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0610>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0370>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebee0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb4c0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0340>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd03d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceba00>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb4f0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0160>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0970>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67e80>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0370>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0580>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb700>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb430>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0160>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd03d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb0d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb9d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0940>, <mlagents_envs.base_env.TerminalSteps object at 0x1052ca9d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb5b0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb3a0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x1052ca9d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0940>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67370>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb760>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x1052ca9d0>, <mlagents_envs.base_env.TerminalSteps object at 0x1052ca100>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb880>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb700>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0970>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0b20>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb100>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebee0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0610>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0580>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebb20>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb490>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0970>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0ac0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.273026], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce0520>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0b20>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb0d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebf40>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce04c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce0460>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceba30>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceba00>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0970>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0610>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb3d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb490>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0160>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0070>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebdf0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb880>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0940>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0d60>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebb20>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb8e0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0610>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67bb0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebee0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebfd0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd00d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd01c0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb3a0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebaf0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0070>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0970>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb490>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebf40>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0ac0>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67370>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb940>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb3d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0d60>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd03d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0340>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd01c0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0610>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0b20>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb490>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb070>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67910>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb4c0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0160>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebd60>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb0d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca08e0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0fd0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebaf0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebd60>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0790>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x1052caf40>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0190>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd03d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67e80>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0340>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0bb0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0340>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0070>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67910>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0d90>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67e80>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0580>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca02b0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0fd0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0b20>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0370>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca04c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca07f0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0940>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0610>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca03d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0730>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd03d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd01c0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0070>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0b50>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0370>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0580>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67a90>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0d60>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0e80>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0b50>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0970>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0d00>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0c70>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0d60>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67a90>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0d30>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0790>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0940>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0070>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0340>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0220>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.27214], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0970>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0340>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0ac0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca08e0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0790>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0ca0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb520>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebaf0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb0d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb430>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca01f0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0c70>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb0d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebee0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0610>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0b20>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c672e0>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67910>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67bb0>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67e80>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb940>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb760>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0610>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0370>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb4f0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb430>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0d60>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0580>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb880>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebb20>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd03d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb9d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceba30>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0970>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67e80>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb3a0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebfd0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd03d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0340>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb9d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb760>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0970>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0ac0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0070>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebdf0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb9d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebf40>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0610>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0160>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb940>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceba00>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0340>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0ac0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb3d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb490>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebeb0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb340>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0070>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb3a0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebf40>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0340>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd00d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0b20>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebaf0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebfd0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd00d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0370>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb4f0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb0d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0940>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebc40>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb430>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0370>, <mlagents_envs.base_env.TerminalSteps object at 0x107c674c0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb070>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebee0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0160>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd00d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb9d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb3d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0d60>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0070>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67a90>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0b20>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb3d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb9d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0970>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0af0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0b50>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0ac0>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67910>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c674c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107434eb0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0970>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0ac0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0bb0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0250>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 2. ,\n",
" 0. , 2. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd00d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0b20>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 0. ,\n",
" 1. , 2. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0e80>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0c70>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 1. ,\n",
" 0. , 2. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67340>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67910>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 0. ,\n",
" 0. , 2. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0730>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0340>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 0. ,\n",
" 0. , 2. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0d60>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0370>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 0. ,\n",
" 2. , 2. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0790>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 2. ,\n",
" 0. , 2. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0ac0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 0. ,\n",
" 0. , 2. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0700>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0a30>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 0. ,\n",
" 0. , 2. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd00d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0970>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 0. ,\n",
" 2. , 2. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0610>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0b20>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 2. ,\n",
" 1. , 2. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb340>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb520>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 1. ,\n",
" 1. , 2. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0160>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0580>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 1. ,\n",
" 2. , 2. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb7f0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebeb0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 2. ,\n",
" 0. , 2. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb070>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceba00>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 0. ,\n",
" 1. , 2. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0340>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0160>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 1. ,\n",
" 2. , 2. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb4c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebfd0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 2. ,\n",
" 0. , 2. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0340>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0a30>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 0. ,\n",
" 1. , 2. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca08e0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0d90>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 1. ,\n",
" 0. , 2. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c674c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0730>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 0. ,\n",
" 2. , 1. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca02e0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0af0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 2. ,\n",
" 2. , 2. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0160>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0580>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 2. ,\n",
" 2. , 2. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0250>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca08e0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 2. ,\n",
" 2. , 2. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0ac0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd03d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 2. ,\n",
" 2. , 2. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0d60>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0970>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 2. ,\n",
" 1. , 2. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0340>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0ca0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 1. ,\n",
" 0. , 2. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0940>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd00d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 0. ,\n",
" 0. , 2. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb3d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb700>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 0. ,\n",
" 2. , 2. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0370>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd03d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 2. ,\n",
" 0. , 2. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb8e0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb790>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 0. ,\n",
" 1. , 2. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0b20>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0370>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 1. ,\n",
" 1. , 2. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebd60>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb7f0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 1. ,\n",
" 1. , 2. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67370>, <mlagents_envs.base_env.TerminalSteps object at 0x107c672e0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 1. ,\n",
" 0. , 2. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebdf0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb430>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 0. ,\n",
" 2. , 2. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x1052ca9d0>, <mlagents_envs.base_env.TerminalSteps object at 0x1052caf40>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 2. ,\n",
" 1. , 2. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebeb0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb760>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 1. ,\n",
" 1. , 2. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0370>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0160>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 1. ,\n",
" 0. , 2. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb8e0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb4f0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 0. ,\n",
" 2. , 2. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0970>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0610>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 2. ,\n",
" 1. , 2. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb340>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb520>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 1. ,\n",
" 1. , 1. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0070>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0ac0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 1. ,\n",
" 1. , 2. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb880>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb7f0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 1. ,\n",
" 2. , 2. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0b20>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd03d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 2. ,\n",
" 2. , 2. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebeb0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb0d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 2. ,\n",
" 0. , 2. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0370>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0d60>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 0. ,\n",
" 1. , 2. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb4f0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceba30>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 1. ,\n",
" 2. , 2. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd03d0>, <mlagents_envs.base_env.TerminalSteps object at 0x1052caf40>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 2. ,\n",
" 1. , 2. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0940>, <mlagents_envs.base_env.TerminalSteps object at 0x107434eb0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 1. ,\n",
" 0. , 2. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebaf0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebeb0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 0. ,\n",
" 0. , 2. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67e80>, <mlagents_envs.base_env.TerminalSteps object at 0x107c674c0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. , 0. ,\n",
" 1. , 2. , 1. , 0. , 0. , 0. , 91.2745],\n",
" dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca03d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0c40>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce07c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0340>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce0520>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0d30>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce0460>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0490>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0bb0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd00d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67370>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0610>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0160>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce04c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca02e0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0160>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0b20>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0c40>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca08e0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0610>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0580>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0a30>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0490>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0610>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd01c0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd00d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0370>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca00a0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0c40>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0340>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0580>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0c40>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0190>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb490>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebd60>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0610>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0d60>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0970>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0b20>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce04c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceba00>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0370>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0940>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd03d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0340>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb760>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebeb0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0d60>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67910>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67bb0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb8e0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebeb0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb490>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb940>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce0700>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0ac0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb4f0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb0d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0070>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0580>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd00d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0610>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0940>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb790>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebb20>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0160>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0370>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb940>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebdf0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0d60>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb070>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceba30>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0070>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0610>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb430>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb490>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0970>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd00d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebd60>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb700>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd01c0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb340>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb430>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd00d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67370>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb8e0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb940>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0970>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0610>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67370>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb070>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd00d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67340>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebdf0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebd30>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0b20>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0370>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebaf0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb940>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0970>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0580>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb940>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebd60>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0ac0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0d60>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb520>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb430>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd00d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0160>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb8e0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebaf0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0340>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0940>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebfd0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb0d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0b20>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0370>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb520>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb5b0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd03d0>, <mlagents_envs.base_env.TerminalSteps object at 0x1052cad60>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb760>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb490>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0160>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0970>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebaf0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb790>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0370>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0070>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb3a0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb0d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0610>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb8e0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb070>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb4f0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb3d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67fa0>, <mlagents_envs.base_env.TerminalSteps object at 0x107c674c0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb0d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebb20>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd03d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0b20>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb5b0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb940>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0070>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0370>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebee0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb340>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd00d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd01c0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb5b0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb7f0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0b20>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0070>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce0520>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0580>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb070>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb3d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce02b0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebb20>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0370>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd01c0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67340>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce04c0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67910>, <mlagents_envs.base_env.TerminalSteps object at 0x107c672e0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebb20>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebd30>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce07c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce0580>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce04c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce02b0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67a90>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67bb0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce0520>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce07c0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67910>, <mlagents_envs.base_env.TerminalSteps object at 0x107c674c0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x1052bbd90>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0340>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67370>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67910>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67e80>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67340>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0160>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0d60>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce0700>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce0520>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x1052cafd0>, <mlagents_envs.base_env.TerminalSteps object at 0x1052cad60>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67e80>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67340>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67a90>, <mlagents_envs.base_env.TerminalSteps object at 0x1052cafd0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.26514], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca02e0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0d00>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce02b0>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67370>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0ac0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd01c0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x10744b7f0>, <mlagents_envs.base_env.TerminalSteps object at 0x107434eb0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0ca0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca00a0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0d60>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0370>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0b20>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c674c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67bb0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce02b0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce01f0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0af0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca02e0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce0520>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd03d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67a90>, <mlagents_envs.base_env.TerminalSteps object at 0x1052ca100>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67fa0>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67340>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca08e0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0730>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce0700>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0370>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0a30>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0190>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd03d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce01f0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0c40>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0be0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd03d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0b20>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0af0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0f70>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0d60>, <mlagents_envs.base_env.TerminalSteps object at 0x1052cad60>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0070>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd00d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce0520>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce04c0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ce0700>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce01f0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca01c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0eb0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0b20>, <mlagents_envs.base_env.TerminalSteps object at 0x107ce01f0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0700>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0a30>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0ac0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0070>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0790>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0580>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0ac0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0070>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0490>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0d30>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd00d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0370>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca01f0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0c40>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x1052caf40>, <mlagents_envs.base_env.TerminalSteps object at 0x1052ca9d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0940>, <mlagents_envs.base_env.TerminalSteps object at 0x1052cafd0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x1052cad60>, <mlagents_envs.base_env.TerminalSteps object at 0x1052ca100>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0940>, <mlagents_envs.base_env.TerminalSteps object at 0x107c674c0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0ca0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0070>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x1052bbd90>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0340>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca03d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0340>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0490>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0d00>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0370>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0610>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0af0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0a60>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb9d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb430>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0070>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0790>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb3d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb9d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb3a0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebd30>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0af0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0490>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebfd0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb5b0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb4f0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebdf0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0eb0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0580>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb100>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb4f0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0250>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0190>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb940>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebd30>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0c40>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0490>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0370>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd00d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0160>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0ac0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0070>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0d00>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0610>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd01c0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0d30>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0580>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0340>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0370>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd03d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0940>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0070>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0fd0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb100>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceba30>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c674c0>, <mlagents_envs.base_env.TerminalSteps object at 0x107c672e0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb7f0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb520>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67340>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0ac0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb6d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb490>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd03d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67910>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebee0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb790>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0370>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0070>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebaf0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb340>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0d60>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0940>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x1052bbd90>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb3a0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb520>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb4f0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67370>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67910>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb430>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebaf0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0ac0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0370>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebb20>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb700>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebd60>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb940>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67e80>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0970>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb8e0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebee0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67bb0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0340>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb790>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb5b0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0ac0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0970>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67910>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebf40>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0160>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67fa0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb9d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb100>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd00d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0070>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb3d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb0d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0340>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb5b0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebee0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0160>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd01c0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebf40>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb880>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0b20>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67bb0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebee0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb3a0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0070>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb340>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd03d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebf40>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb070>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb520>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ca0af0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0a60>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd00d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107ca0fd0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebd60>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebd30>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67910>, <mlagents_envs.base_env.TerminalSteps object at 0x107c672e0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb880>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb3a0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0160>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0340>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67340>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67370>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebb20>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebd60>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107c67340>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd01c0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb430>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb7f0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb070>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb5b0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 1. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd00d0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0340>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0ac0>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd0580>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 2. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb100>, <mlagents_envs.base_env.TerminalSteps object at 0x107cebc40>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cebd30>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb3d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 1. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0580>, <mlagents_envs.base_env.TerminalSteps object at 0x107c67910>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 1. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107ceb700>, <mlagents_envs.base_env.TerminalSteps object at 0x107ceb0d0>)\n",
"DecisionStep(obs=[array([[0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 0. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n",
"Step\n",
"(<mlagents_envs.base_env.DecisionSteps object at 0x107cd0940>, <mlagents_envs.base_env.TerminalSteps object at 0x107cd03d0>)\n",
"DecisionStep(obs=[array([[0., 1., 0., 0., 0.],\n",
" [0., 1., 0., 0., 0.],\n",
" [0., 1., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0.]], dtype=float32), array([ 0. , 0. , 0. , 3. , 3. , 3. ,\n",
" 0. , 2. , 2. , 1. , 0. , 0. ,\n",
" 0. , 91.30669], dtype=float32)], reward=0.0, agent_id=0, action_mask=[array([False]), array([False]), array([False])], group_id=0, group_reward=0.0)\n",
"0.0\n"
]
},
{
"ename": "UnityCommunicatorStoppedException",
"evalue": "Communicator has exited.",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mUnityCommunicatorStoppedException\u001b[0m Traceback (most recent call last)",
"Input \u001b[0;32mIn [85]\u001b[0m, in \u001b[0;36m<cell line: 2>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m----> 2\u001b[0m \u001b[43menv\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstep\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mStep\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 4\u001b[0m asd \u001b[38;5;241m=\u001b[39m env\u001b[38;5;241m.\u001b[39mget_steps(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnpc?team=0\u001b[39m\u001b[38;5;124m'\u001b[39m)\n",
"File \u001b[0;32m~/opt/miniforge3/lib/python3.9/site-packages/mlagents_envs/timers.py:305\u001b[0m, in \u001b[0;36mtimed.<locals>.wrapped\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 303\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrapped\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 304\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m hierarchical_timer(func\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__qualname__\u001b[39m):\n\u001b[0;32m--> 305\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/opt/miniforge3/lib/python3.9/site-packages/mlagents_envs/environment.py:350\u001b[0m, in \u001b[0;36mUnityEnvironment.step\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 348\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_communicator\u001b[38;5;241m.\u001b[39mexchange(step_input, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_poll_process)\n\u001b[1;32m 349\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m outputs \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 350\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m UnityCommunicatorStoppedException(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCommunicator has exited.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 351\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_update_behavior_specs(outputs)\n\u001b[1;32m 352\u001b[0m rl_output \u001b[38;5;241m=\u001b[39m outputs\u001b[38;5;241m.\u001b[39mrl_output\n",
"\u001b[0;31mUnityCommunicatorStoppedException\u001b[0m: Communicator has exited."
]
}
],
"source": [
"while True:\n",
" env.step()\n",
" print(\"Step\")\n",
" asd = env.get_steps('npc?team=0')\n",
" print(asd)\n",
" print(asd[0][0])\n",
" _id = asd[0][0].obs[0][0][0]\n",
" print(_id)\n",
" env.set_action_for_agent('npc?team=0', 0, mlagents_envs.environment.ActionTuple(discrete=np.array([[1, 0, random.randint(0,2)]])))"
]
},
{
"cell_type": "code",
"execution_count": 86,
"id": "db100c84-22ab-491b-b68d-4d5c1bbc66a3",
"metadata": {},
"outputs": [],
"source": [
"env.close()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}