{"id":2009,"date":"2020-10-21T06:14:46","date_gmt":"2020-10-21T06:14:46","guid":{"rendered":"https:\/\/kukan-joho.plus\/?p=2009"},"modified":"2024-11-25T05:01:58","modified_gmt":"2024-11-25T05:01:58","slug":"reinforcement-learning-1","status":"publish","type":"post","link":"https:\/\/club.informatix.co.jp\/?p=2009","title":{"rendered":"\u4e09\u76ee\u4e26\u3079\u3067\u5b66\u3076\u5f37\u5316\u5b66\u7fd2\uff5cQ\u5b66\u7fd2\u3068\u5b9f\u88c5\u4f8b\u3092\u89e3\u8aac\uff081\uff09"},"content":{"rendered":"<p>\u5f37\u5316\u5b66\u7fd2\u306f\u6a5f\u68b0\u5b66\u7fd2\u304c\u7814\u7a76\u3055\u308c\u59cb\u3081\u305f\u521d\u671f\u306e1950\u5e74\u4ee3\u304b\u3089\u5b58\u5728\u3057\u3001\u6b74\u53f2\u7684\u306b\u306f\u300c\u6559\u5e2b\u3042\u308a\u5b66\u7fd2\u300d\u306a\u3069\u4ed6\u306e\u6a5f\u68b0\u5b66\u7fd2\u624b\u6cd5\u306e\u6d3e\u751f\u5143\u306b\u4f4d\u7f6e\u4ed8\u3051\u3089\u308c\u308b\u5206\u91ce\u3067\u3059\u3002<\/p>\n<p>\u6700\u8fd1\u3067\u306f2016\u5e74\u306b\u5f37\u5316\u5b66\u7fd2\u3092\u7528\u3044\u305fGoogle DeepMind\u306e\u56f2\u7881AI\u300cAlpha Go\u300d\u304c\u30c8\u30c3\u30d7\u68cb\u58eb\u306b\u5b8c\u52dd\u3059\u308b\u3068\u3044\u3046\u6210\u679c\u3092\u6319\u3052\u3066\u8a71\u984c\u306b\u306a\u308a\u307e\u3057\u305f\u3002<\/p>\n<p>\u3053\u306e\u8a18\u4e8b\u3067\u306f\u5f37\u5316\u5b66\u7fd2\u306e\u7406\u8ad6\u3092\u7c21\u5358\u306b\u7d39\u4ecb\u3057\u3001\u305d\u306e\u57fa\u672c\u4f8b\u3092Tic-Tac-Toe\uff08\u25cb\u00d7\u30b2\u30fc\u30e0\u3001\u4e09\u76ee\u4e26\u3079\uff09\u3068\u3044\u3046\u30b2\u30fc\u30e0\u3092\u7528\u3044\u305f\u5b9f\u88c5\u3067\u78ba\u8a8d\u3057\u307e\u3059\u3002<\/p>\n<div id=\"ez-toc-container\" class=\"ez-toc-v2_0_82_2 counter-hierarchy ez-toc-counter ez-toc-grey ez-toc-container-direction\">\n<p class=\"ez-toc-title\" style=\"cursor:inherit\">\u76ee\u6b21<\/p>\n<label for=\"ez-toc-cssicon-toggle-item-69e8a24c16f62\" class=\"ez-toc-cssicon-toggle-label\"><span class=\"\"><span class=\"eztoc-hide\" style=\"display:none;\">Toggle<\/span><span class=\"ez-toc-icon-toggle-span\"><svg style=\"fill: #999;color:#999\" xmlns=\"http:\/\/www.w3.org\/2000\/svg\" class=\"list-377408\" width=\"20px\" height=\"20px\" viewBox=\"0 0 24 24\" fill=\"none\"><path d=\"M6 6H4v2h2V6zm14 0H8v2h12V6zM4 11h2v2H4v-2zm16 0H8v2h12v-2zM4 16h2v2H4v-2zm16 0H8v2h12v-2z\" fill=\"currentColor\"><\/path><\/svg><svg style=\"fill: #999;color:#999\" class=\"arrow-unsorted-368013\" xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"10px\" height=\"10px\" viewBox=\"0 0 24 24\" version=\"1.2\" baseProfile=\"tiny\"><path d=\"M18.2 9.3l-6.2-6.3-6.2 6.3c-.2.2-.3.4-.3.7s.1.5.3.7c.2.2.4.3.7.3h11c.3 0 .5-.1.7-.3.2-.2.3-.5.3-.7s-.1-.5-.3-.7zM5.8 14.7l6.2 6.3 6.2-6.3c.2-.2.3-.5.3-.7s-.1-.5-.3-.7c-.2-.2-.4-.3-.7-.3h-11c-.3 0-.5.1-.7.3-.2.2-.3.5-.3.7s.1.5.3.7z\"\/><\/svg><\/span><\/span><\/label><input type=\"checkbox\"  id=\"ez-toc-cssicon-toggle-item-69e8a24c16f62\" checked aria-label=\"Toggle\" \/><nav><ul class='ez-toc-list ez-toc-list-level-1 ' ><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-1\" href=\"https:\/\/club.informatix.co.jp\/?p=2009\/#%E5%BC%B7%E5%8C%96%E5%AD%A6%E7%BF%92%E3%81%A8%E3%81%AF\" >\u5f37\u5316\u5b66\u7fd2\u3068\u306f<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-2\" href=\"https:\/\/club.informatix.co.jp\/?p=2009\/#%E5%BC%B7%E5%8C%96%E5%AD%A6%E7%BF%92%E3%81%AE%E7%90%86%E8%AB%96\" >\u5f37\u5316\u5b66\u7fd2\u306e\u7406\u8ad6<\/a><ul class='ez-toc-list-level-3' ><li class='ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-3\" href=\"https:\/\/club.informatix.co.jp\/?p=2009\/#Q%E5%AD%A6%E7%BF%92\" >Q\u5b66\u7fd2<\/a><\/li><\/ul><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-4\" href=\"https:\/\/club.informatix.co.jp\/?p=2009\/#%E5%AE%9F%E8%A3%85_Tic-Tac-Toe\" >\u5b9f\u88c5: Tic-Tac-Toe<\/a><ul class='ez-toc-list-level-3' ><li class='ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-5\" href=\"https:\/\/club.informatix.co.jp\/?p=2009\/#Tic-Tac-Toe\" >Tic-Tac-Toe<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-6\" href=\"https:\/\/club.informatix.co.jp\/?p=2009\/#%E5%AE%9F%E8%A3%85\" >\u5b9f\u88c5<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-7\" href=\"https:\/\/club.informatix.co.jp\/?p=2009\/#%E5%AE%9F%E9%A8%93\" >\u5b9f\u9a13<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-8\" href=\"https:\/\/club.informatix.co.jp\/?p=2009\/#%E3%82%BD%E3%83%BC%E3%82%B9%E3%82%B3%E3%83%BC%E3%83%89\" >\u30bd\u30fc\u30b9\u30b3\u30fc\u30c9<\/a><\/li><\/ul><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-9\" href=\"https:\/\/club.informatix.co.jp\/?p=2009\/#%E3%81%BE%E3%81%A8%E3%82%81\" >\u307e\u3068\u3081<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-10\" href=\"https:\/\/club.informatix.co.jp\/?p=2009\/#%E5%8F%82%E8%80%83%E6%96%87%E7%8C%AE\" >\u53c2\u8003\u6587\u732e<\/a><\/li><\/ul><\/nav><\/div>\n\n<h2><span class=\"ez-toc-section\" id=\"%E5%BC%B7%E5%8C%96%E5%AD%A6%E7%BF%92%E3%81%A8%E3%81%AF\"><\/span>\u5f37\u5316\u5b66\u7fd2\u3068\u306f<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>\u5f37\u5316\u5b66\u7fd2\uff08Reinforcement Learning\uff09\u306f\u6a5f\u68b0\u5b66\u7fd2\uff08Machine Learning\uff09\u306e\u4e00\u3064\u3067\u3001\u30b7\u30b9\u30c6\u30e0\u304c\u81ea\u3089\u8a66\u884c\u932f\u8aa4\u3057\u306a\u304c\u3089\u6700\u9069\u306a\u5236\u5fa1\u3092\u5b9f\u73fe\u3059\u308b\u624b\u6cd5\u306e\u3053\u3068\u3092\u3044\u3044\u307e\u3059\u3002<\/p>\n<h2><span class=\"ez-toc-section\" id=\"%E5%BC%B7%E5%8C%96%E5%AD%A6%E7%BF%92%E3%81%AE%E7%90%86%E8%AB%96\"><\/span>\u5f37\u5316\u5b66\u7fd2\u306e\u7406\u8ad6<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>\u5f37\u5316\u5b66\u7fd2\u3067\u306f<strong>\u74b0\u5883<\/strong>\u3068<strong>\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8<\/strong>\u306e\u76f8\u4e92\u4f5c\u7528\u3092\u8003\u3048\u307e\u3059\u3002<\/p>\n<div id=\"attachment_2046\" style=\"width: 424px\" class=\"wp-caption aligncenter\"><img decoding=\"async\" aria-describedby=\"caption-attachment-2046\" class=\"wp-image-2046 size-full\" src=\"https:\/\/club.informatix.co.jp\/wp-content\/uploads\/2020-p\/10\/\u5f37\u5316\u5b66\u7fd2.png\" alt=\"\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u3068\u74b0\u5883\u306e\u76f8\u4e92\u4f5c\u7528\" width=\"414\" height=\"181\" \/><p id=\"caption-attachment-2046\" class=\"wp-caption-text\">\u56f31: \u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u3068\u74b0\u5883\u306e\u76f8\u4e92\u4f5c\u7528<\/p><\/div>\n<p>\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u306f\u74b0\u5883\u306e<strong>\u72b6\u614b<\/strong>\\(s\\)\u3092\u898b\u3066\u3001<strong>\u884c\u52d5<\/strong>\\(a\\)\u3092\u5b9f\u884c\u3057\u307e\u3059\u3002\u74b0\u5883\u306f\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u306e\u884c\u52d5\\(a\\)\u306b\u5fdc\u3058\u3066\u72b6\u614b\\(s\\)\u3092\u66f4\u65b0\u3057\u3001\u884c\u52d5\u306e\u7d50\u679c\u3068\u3057\u3066<strong>\u5831\u916c<\/strong>\\(r\\)\u3092\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u306b\u8fd4\u3057\u307e\u3059\u3002<\/p>\n<p>\u4f8b\u3048\u3070\u30d6\u30e9\u30c3\u30af\u30b8\u30e3\u30c3\u30af\u306e\u30b2\u30fc\u30e0\u3067\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u304c\u30d7\u30ec\u30a4\u30e4\u30fc\u306e\u5834\u5408\u3092\u8003\u3048\u308b\u3068\u3001\u72b6\u614b\u306f\u300c\u8868\u5411\u304d\u3067\u8868\u793a\u3055\u308c\u3066\u3044\u308b\u30c8\u30e9\u30f3\u30d7\u306e\u6570\u5b57\u300d\u3001\u884c\u52d5\u306f\u300c\u30d2\u30c3\u30c8\u30fb\u30b9\u30bf\u30f3\u30c9\u306a\u3069\u306e\u30d7\u30ec\u30a4\u30e4\u30fc\u304c\u53d6\u308c\u308b\u9078\u629e\u80a2\u300d\u3001\u5831\u916c\u306f\u300c\u52dd\u6557\u306b\u5fdc\u3058\u305f\u914d\u5f53\u304b\u3089\u8ced\u3051\u91d1\u3092\u5f15\u3044\u305f\u5024\u300d\u3068\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<\/p>\n<p>\u5f37\u5316\u5b66\u7fd2\u306e\u76ee\u7684\u306f\u74b0\u5883\u3092\u653b\u7565\u3059\u308b\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u3092\u80b2\u3066\u308b\u3053\u3068\u3001\u3064\u307e\u308a\u7dcf\u5831\u916c\u304c\u306a\u308b\u3079\u304f\u591a\u304f\u306a\u308b\u884c\u52d5\u306e\u9078\u3073\u65b9\u3092\u6a21\u7d22\u3059\u308b\u3053\u3068\u306b\u306a\u308a\u307e\u3059\u3002<\/p>\n<p>$$R_t = r_{t+1} + \\gamma r_{t+2} + \\gamma^2 r_{t+3} + \\cdots = \\sum_{k=1}^{\\infty} \\gamma^{k-1} r_{t+k}.$$<\/p>\n<p>\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u306f\u305d\u306e\u77ac\u9593\u3060\u3051\u5831\u916c\u304c\u9ad8\u304f\u306a\u308b\u884c\u52d5\u3092\u9078\u3076\u306e\u3067\u306f\u306a\u304f\u3001\u9577\u3044\u76ee\u3067\u898b\u3066\u5f97\u3092\u3059\u308b\u884c\u52d5\u3092\u9078\u3076\u3053\u3068\u304c\u6c42\u3081\u3089\u308c\u307e\u3059\u3002<\/p>\n<p>\u305d\u306e\u30d0\u30e9\u30f3\u30b9\u3092\u62c5\u3046\u306e\u304c<strong>\u5272\u5f15\u7387<\/strong>\u3068\u547c\u3070\u308c\u308b\u30d1\u30e9\u30e1\u30fc\u30bf\u30fc\\(\\gamma\\in(0, 1)\\)\u3067\u3059\u3002\\(\\gamma\\)\u3092\\(1\\)\u306b\u8fd1\u4ed8\u3051\u308b\u307b\u3069\u9577\u671f\u7684\u306a\u7dcf\u5831\u916c\u3068\u306a\u308a\u3001\u9006\u306b\\(0\\)\u306b\u8fd1\u4ed8\u3051\u308c\u3070\u77ac\u9593\u7684\u306a\u7dcf\u5831\u916c\u3068\u306a\u308a\u307e\u3059\u3002<\/p>\n<p>\u72b6\u614b\u304c\\(s\\)\u306e\u3068\u304d\u3001\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u306f\u78ba\u7387\u7684\u306b\u884c\u52d5\\(a\\)\u3092\u9078\u629e\u3059\u308b\u3068\u3057\u307e\u3059\u3002\u305d\u306e\u78ba\u7387\u5206\u5e03\u3092\\(\\pi(s, a)\\)\u3068\u8868\u3057\u3066<strong>\u65b9\u7b56<\/strong>\u3068\u547c\u3073\u307e\u3059\u3002\u65b9\u7b56\u3092\u8abf\u6574\u3059\u308b\u3053\u3068\u3067\u3001\u3088\u308a\u5927\u304d\u306a\u7dcf\u5831\u916c\u3092\u5f97\u3089\u308c\u308b\u3088\u3046\u306b\u306a\u308c\u3070\u76ee\u7684\u304c\u9054\u6210\u3055\u308c\u307e\u3059\u3002<\/p>\n<p>\u305d\u306e\u65b9\u6cd5\u3092\u8003\u3048\u308b\u305f\u3081\u306b\u3001\u72b6\u614b\u4fa1\u5024\u95a2\u6570\\(V^\\pi(s)\\)\u3068\u884c\u52d5\u4fa1\u5024\u95a2\u6570\\(Q^\\pi(s, a)\\)\u3068\u3044\u3046\u6307\u6a19\u3092\u5c0e\u5165\u3057\u307e\u3059\u3002<\/p>\n<p>\\begin{eqnarray}<br \/>\nV^\\pi(s) &amp;:=&amp; E_\\pi\\{R_t | s_t = s\\}, \\\\<br \/>\nQ^\\pi(s, a) &amp;:=&amp; E_\\pi\\{R_t | s_t = s, a_t = a\\}.<br \/>\n\\end{eqnarray}<\/p>\n<p>\u72b6\u614b\u4fa1\u5024\u95a2\u6570\\(V^\\pi(s)\\)\u306f\u72b6\u614b\u304c\\(s\\)\u3067\u3042\u308b\u3068\u304d\u306b\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u304c\u53d7\u3051\u53d6\u308c\u308b\u7dcf\u5831\u916c\u306e\u671f\u5f85\u5024\u3067\u3001\u884c\u52d5\u4fa1\u5024\u95a2\u6570\\(Q^\\pi(s, a)\\)\u306f\u72b6\u614b\u304c\\(s\\)\u3067\u3042\u308b\u3068\u304d\u306b\u884c\u52d5\\(a\\)\u3092\u53d6\u3063\u305f\u5834\u5408\u306b\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u304c\u53d7\u3051\u53d6\u308c\u308b\u7dcf\u5831\u916c\u306e\u671f\u5f85\u5024\u3067\u3059\u3002<\/p>\n<p>\u3053\u306e\u3068\u304d\\(R_t\\)\u306e\u5b9a\u7fa9\u3088\u308a\u3001\\(V(s)\\)\u3068\\(Q(s, a)\\)\u306e\u9593\u306b\u6b21\u306e\u95a2\u4fc2\u304c\u6210\u7acb\u3057\u307e\u3059\u3002<\/p>\n<p>\\begin{eqnarray}<br \/>\nV^\\pi(s) &amp;=&amp; \\sum_{a} \\pi(s, a)Q^\\pi(s, a), \\\\<br \/>\nQ^\\pi(s, a) &amp;=&amp; \\sum_{s&#8217;} P_{s, s&#8217;}^{a}\\left( R_{s, s&#8217;}^{a}+\\gamma V^\\pi(s&#8217;)\\right) .<br \/>\n\\end{eqnarray}<\/p>\n<p>\u3053\u308c\u3092<strong>Bellman\u65b9\u7a0b\u5f0f<\/strong>\u3068\u547c\u3073\u307e\u3059\u3002\u3053\u3053\u3067\u3001\\(P_{s, s&#8217;}^{a}\\)\u306f\u72b6\u614b\\(s\\)\u3067\u884c\u52d5\\(a\\)\u3092\u53d6\u3063\u305f\u3068\u304d\u306b\u72b6\u614b\u304c\\(s&#8217;\\)\u306b\u9077\u79fb\u3059\u308b\u78ba\u7387\u3001\\(R_{s, s&#8217;}^{a}\\)\u306f\u72b6\u614b\\(s\\)\u3067\u884c\u52d5\\(a\\)\u3092\u53d6\u3063\u305f\u3068\u304d\u306e\u5831\u916c\u306e\u671f\u5f85\u5024\u3067\u3059\u3002<\/p>\n<p>\u3053\u308c\u306b\u5bfe\u3057\u3066\u3001\u6700\u5927\u306e\u72b6\u614b\u4fa1\u5024\u95a2\u6570\u30fb\u884c\u52d5\u4fa1\u5024\u95a2\u6570\u3092\u9054\u6210\u3059\u308b\u65b9\u7b56\\(\\pi^*\\)\u304c\u5b58\u5728\u3059\u308b\u3053\u3068\u304c\u8a3c\u660e\u3055\u308c\u3066\u3044\u307e\u3059\u3002\u3053\u306e\u3068\u304d\u3001\\(V^*:=V^{\\pi^*}, Q^*:=Q^{\\pi^*}\\)\u306b\u5bfe\u3057\u3066\u6b21\u306e\u95a2\u4fc2\u304c\u6210\u7acb\u3057\u307e\u3059\u3002\u3053\u3061\u3089\u306f<strong>Bellman\u6700\u9069\u65b9\u7a0b\u5f0f<\/strong>\u3068\u547c\u3070\u308c\u307e\u3059\u3002<\/p>\n<p>\\begin{eqnarray}<br \/>\nV^*(s) &amp;=&amp; \\max_{a} Q^*(s, a), \\\\<br \/>\nQ^*(s, a) &amp;=&amp; \\sum_{s&#8217;} P_{s, s&#8217;}^{a}\\left( R_{s, s&#8217;}^{a}+\\gamma V^*(s&#8217;)\\right) .<br \/>\n\\end{eqnarray}<\/p>\n<p>\u6700\u5584\u306e\u9078\u629e\u3092\u3059\u308b\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u3092\u6c42\u3081\u308b\u3068\u3044\u3046\u5f37\u5316\u5b66\u7fd2\u306e\u76ee\u7684\u306f\u3001\u6570\u5b66\u7684\u306b\u8a00\u3044\u63db\u3048\u308b\u3068Bellman\u6700\u9069\u65b9\u7a0b\u5f0f\u3092\u89e3\u304f\u3053\u3068\u306b\u306a\u308b\u3053\u3068\u304c\u308f\u304b\u308a\u307e\u3057\u305f\u3002<\/p>\n<h3><span class=\"ez-toc-section\" id=\"Q%E5%AD%A6%E7%BF%92\"><\/span>Q\u5b66\u7fd2<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<p>Bellman\u6700\u9069\u65b9\u7a0b\u5f0f\u306e\u89e3\u6cd5\u306b\u306f\u52d5\u7684\u30d7\u30ed\u30b0\u30e9\u30df\u30f3\u30b0\u30fb\u30e2\u30f3\u30c6\u30ab\u30eb\u30ed\u6cd5\u30fbTD\u5b66\u7fd2\u306a\u3069\u306e\u69d8\u3005\u306a\u624b\u6cd5\u304c\u3042\u308a\u307e\u3059\u304c\u3001\u4eca\u56de\u306f\u305d\u306e\u4e2d\u3067\u3082TD\u5b66\u7fd2\u306b\u5c5e\u3059\u308b\u300c<strong>Q\u5b66\u7fd2<\/strong>\u300d\u3092\u4f7f\u3044\u307e\u3059\u3002<\/p>\n<p>TD\u5b66\u7fd2(Temporal-Difference Learning)\u306f\u30e2\u30c7\u30eb\u306b\u95a2\u3059\u308b\u60c5\u5831(\\(P_{s, s&#8217;}^{a}\\)\u3068\\(R_{s, s&#8217;}^{a}\\))\u304c\u4e0d\u8981\u3067\u3001\u884c\u52d51\u56de\u5358\u4f4d\u3067\u4fa1\u5024\u95a2\u6570\u3092\u66f4\u65b0\u3067\u304d\u308b\u3068\u3044\u3046\u7279\u5fb4\u304c\u3042\u308a\u307e\u3059\u3002<\/p>\n<p>Q\u5b66\u7fd2\u3067\u306f\u540d\u524d\u306e\u901a\u308a\u884c\u52d5\u4fa1\u5024\u95a2\u6570\\(Q\\)\u3092\u66f4\u65b0\u3057\u3066\u3044\u304d\u307e\u3059\u3002\u72b6\u614b\\(s_t\\)\u3067\u884c\u52d5\\(a_t\\)\u3092\u53d6\u3063\u305f\u3068\u304d\u306b\u5831\u916c\\(r_{t+1}\\)\u304c\u5f97\u3089\u308c\u3066\u72b6\u614b\u304c\\(s_{t+1}\\)\u306b\u9077\u79fb\u3057\u305f\u3068\u304d\u3001\\(Q\\)\u306e\u5024\u3092\u6b21\u306e\u3088\u3046\u306b\u66f4\u65b0\u3057\u307e\u3059\u3002<\/p>\n<p>$$<br \/>\nQ(s_t, a_t) \\leftarrow Q(s_t, a_t) + \\alpha \\left( r_{t+1} + \\gamma \\max_{a}Q(s_{t+1}, a) &#8211; Q(s_t, a_t)\\right).<br \/>\n$$<\/p>\n<p>\u3053\u3053\u3067\\(\\alpha\\)\u306f\u5b66\u7fd2\u7387\u3067\u3059\u3002\u3053\u306e\u66f4\u65b0\u5f0f\u306f\u5909\u5f62\u3059\u308b\u3068\u4ee5\u4e0b\u306e\u3088\u3046\u306b\u306a\u308a\u307e\u3059\u3002<\/p>\n<p>$$<br \/>\nQ(s_t, a_t) \\leftarrow (1-\\alpha)Q(s_t, a_t) + \\alpha \\left( r_{t+1} + \\gamma \\max_{a}Q(s_{t+1}, a)\\right).<br \/>\n$$<\/p>\n<p>\u66f4\u65b0\u3092\u884c\u3046\u5ea6\u306b\\(Q(s_t, a_t)\\)\u306e\u5024\u304c\\(r_{t+1} + \\gamma \\max_{a}Q(s_{t+1}, a)\\)\u306b\\(\\alpha\\)\u306e\u5272\u5408\u3067\u8fd1\u3065\u304f\u3068\u3044\u3046\u5f0f\u306b\u306a\u308a\u307e\u3059\u3002<\/p>\n<p>\\(r_{t+1} + \\gamma \\max_{a}Q(s_{t+1}, a)\\)\u306fBellman\u6700\u9069\u65b9\u7a0b\u5f0f\u306e\u4e2d\u306b\u73fe\u308c\u308b\\(R_{s, s&#8217;}^{a}+\\gamma V^*(s&#8217;)\\)\u306b\u5bfe\u5fdc\u3057\u3066\u304a\u308a\u3001\u3053\u306e\u66f4\u65b0\u3092\u7e70\u308a\u8fd4\u3059\u3053\u3068\u3067\\(Q\\)\u306f\\(Q^*\\)\u306b\u8fd1\u4ed8\u3044\u3066\u3044\u304d\u307e\u3059\u3002<\/p>\n<p>\u76ee\u7684\u306e\u65b9\u7b56\u306f\\(Q\\)\u3092\u7528\u3044\u3066\u6b21\u306e\u3088\u3046\u306b\u6c42\u3081\u3089\u308c\u307e\u3059\u3002<\/p>\n<p>\\[<br \/>\n\\pi(s, a) =<br \/>\n\\begin{cases}<br \/>\n1 &amp; (a = \\underset{a}{{\\rm argmax}}Q(s, a)), \\\\<br \/>\n0 &amp; ({\\rm otherwise}).<br \/>\n\\end{cases}<br \/>\n\\]<\/p>\n<p>\\(Q\\)\u306e\u66f4\u65b0\u304c\u5c40\u6240\u306b\u504f\u3089\u306a\u3044\u3088\u3046\u306b\u3001\u5b66\u7fd2\u6642\u306b\u53d6\u308b\u884c\u52d5\\(a_t\\)\u306f\u03b5-greedy\u6cd5\u306a\u3069\u3092\u4f7f\u3063\u3066\u591a\u5c11\u306e\u30e9\u30f3\u30c0\u30e0\u6027\u3092\u52a0\u3048\u3066\u9078\u3073\u307e\u3059\u3002<\/p>\n<h2><span class=\"ez-toc-section\" id=\"%E5%AE%9F%E8%A3%85_Tic-Tac-Toe\"><\/span>\u5b9f\u88c5: Tic-Tac-Toe<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>\u5b9f\u88c5\u3067\u306f\\(Q\\)\u3092\u914d\u5217\u3067\u8868\u73fe\u3057\u3066\u3001\u5168\u3066\u306e\\((s, a)\\)\u306b\u5bfe\u5fdc\u3059\u308b\\(Q\\)\u306e\u5024\u3092\u4fdd\u6301\u3057\u307e\u3059\u3002\u305d\u306e\u305f\u3081\u3001\u53ef\u80fd\u306a\u72b6\u614b\u3084\u884c\u52d5\u306e\u6570\u304c\u81a8\u5927\u306a\u554f\u984c\u3067\u306f\u3001\u5fc5\u8981\u306a\u30e1\u30e2\u30ea\u30fc\u306e\u5bb9\u91cf\u3084\u53ce\u675f\u307e\u3067\u306e\u8a08\u7b97\u91cf\u304c\u7206\u767a\u3057\u3066\u3057\u307e\u3044\u5411\u3044\u3066\u3044\u307e\u305b\u3093\u3002<\/p>\n<p>\u4eca\u56de\u6271\u3046Tic-Tac-Toe(\u25cb\u00d7\u30b2\u30fc\u30e0\u3001\u4e09\u76ee\u4e26\u3079)\u3068\u3044\u3046\u30b2\u30fc\u30e0\u306f\u3001\u72b6\u614b\u306e\u6570\u304c\u9ad8\u3005\\(3^9=19683\\)\u901a\u308a\u3001\u884c\u52d5\u306e\u6570\u304c\\(9\\)\u901a\u308a\u306a\u306e\u3067Q\u5b66\u7fd2\u3067\u5341\u5206\u5b66\u7fd2\u53ef\u80fd\u3067\u3059\u3002<\/p>\n<h3><span class=\"ez-toc-section\" id=\"Tic-Tac-Toe\"><\/span>Tic-Tac-Toe<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<p>Tic-Tac-Toe\u306e\u30eb\u30fc\u30eb\u3092\u4ee5\u4e0b\u306e\u3088\u3046\u306b\u5b9a\u3081\u307e\u3059\u3002<\/p>\n<ul>\n<li>\u76e4\u9762\u306f\u7e26\u6a2a3\u30de\u30b9\u305a\u3064\u306e9\u30de\u30b9<\/li>\n<li>\u6700\u521d\u306f\u5168\u3066\u306e\u30de\u30b9\u304c\u7a7a\u306e\u72b6\u614b<\/li>\n<li>\u5148\u624b\u306f\u00d7\u3092\u3001\u5f8c\u624b\u306f\u25cb\u3092\u4ea4\u4e92\u306b\u7a7a\u306e\u30de\u30b9\u306b\u66f8\u304d\u8fbc\u3093\u3067\u3044\u304f<\/li>\n<li>\u30d1\u30b9\u306f\u4e0d\u53ef<\/li>\n<li>\u81ea\u5206\u306e\u30de\u30fc\u30af\u3092\u7e26\u6a2a\u659c\u3081\u306e\u3044\u305a\u308c\u304b\u4e00\u5217\u30673\u3064\u4e26\u3079\u305f\u65b9\u304c\u52dd\u3061<\/li>\n<li>\u52dd\u6557\u304c\u6c7a\u305b\u305a\u306b9\u30de\u30b9\u5168\u3066\u304c\u57cb\u307e\u3063\u305f\u3089\u5f15\u304d\u5206\u3051<\/li>\n<\/ul>\n<h3><span class=\"ez-toc-section\" id=\"%E5%AE%9F%E8%A3%85\"><\/span>\u5b9f\u88c5<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<p>\u72b6\u614b\\(s\\)\u306f\u76e4\u9762\u306e\u72b6\u614b\u306b\u5bfe\u5fdc\u3057\u3066\u3044\u3066\u3001\u884c\u52d5\\(a\\)\u306f\u6b21\u306b\u66f8\u304d\u8fbc\u3080\u4f4d\u7f6e\u306b\u5bfe\u5fdc\u3057\u3066\u3044\u307e\u3059\u3002<\/p>\n<p>\u5831\u916c\\(r\\)\u306f\u52dd\u6557\u306e\u7d50\u679c\u306b\u5bfe\u5fdc\u3057\u3066\u3044\u3066\u3001\u4eca\u56de\u306f\u52dd\u3063\u305f\u3068\u304d\u306b\\(1\\)\u3001\u8ca0\u3051\u305f\u3068\u304d\u306b\\(-1\\)\u3001\u52dd\u6557\u304c\u3064\u3044\u3066\u3044\u306a\u3044\u3068\u304d\u306f\u5e38\u306b\\(0\\)\u3068\u306a\u308b\u3088\u3046\u306b\u8a2d\u5b9a\u3057\u307e\u3059\u3002<\/p>\n<h4>\u72b6\u614b\u9077\u79fb<\/h4>\n<p>\u3053\u3053\u3067\u6ce8\u610f\u3057\u306a\u3051\u308c\u3070\u306a\u3089\u306a\u3044\u306e\u306f\u3001Tic-Tac-Toe\u306f\u5148\u624b\u3068\u5f8c\u624b\u304c\u4ea4\u4e92\u306b\u30d7\u30ec\u30a4\u3059\u308b\u30b2\u30fc\u30e0\u3067\u3042\u308b\u3068\u3044\u3046\u3053\u3068\u3067\u3059\u3002<\/p>\n<p>\u3057\u305f\u304c\u3063\u3066\u56f32\u306e\u3088\u3046\u306b\u3001\u5148\u624b\u304b\u3089\u898b\u3066\u72b6\u614b\\(s_t\\)\u306e\u6b21\u306e\u72b6\u614b\\(s_{t+1}\\)\u306f\\(s_t\\)\u306b\\(a_t\\)\u3092\u53cd\u6620\u3055\u305b\u305f\u76f4\u5f8c\u306e\u72b6\u614b\\(s&#8217;_t\\)\u3067\u306f\u306a\u304f\u3001\u6b21\u306e\u76f8\u624b\u306e\u884c\u52d5\\(a&#8217;_t\\)\u306e\u5f8c\u306e\u72b6\u614b\u3068\u306a\u308a\u307e\u3059\u3002<\/p>\n<div id=\"attachment_2100\" style=\"width: 248px\" class=\"wp-caption aligncenter\"><img decoding=\"async\" aria-describedby=\"caption-attachment-2100\" class=\"wp-image-2100 size-full\" src=\"https:\/\/club.informatix.co.jp\/wp-content\/uploads\/2020-p\/10\/tictactoetree.png\" alt=\"Tic-Tac-Toe\u306e\u72b6\u614b\u9077\u79fb\" width=\"238\" height=\"276\" \/><p id=\"caption-attachment-2100\" class=\"wp-caption-text\">\u56f32: Tic-Tac-Toe\u306e\u72b6\u614b\u9077\u79fb<\/p><\/div>\n<p>Q\u5b66\u7fd2\u3067\u306f\u540c\u3058\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u540c\u58eb\u306e\u30b2\u30fc\u30e0\u3067\\((s_t, a_t, r_{t+1}), (s&#8217;_t, a&#8217;_t, r&#8217;_{t+1})\\)\u3092\u53d6\u5f97\u3057\u3001\\((s_t, a_t, r_{t+1}-r&#8217;_{t+1}, s_{t+1})\\)\u3042\u308b\u3044\u306f\\((s&#8217;_t, a&#8217;_t, r&#8217;_{t+1}-r_{t+2}, s&#8217;_{t+1})\\)\u3067\u884c\u52d5\u4fa1\u5024\u95a2\u6570\\(Q\\)\u3092\u66f4\u65b0\u3059\u308b\u3068\u3044\u3046\u64cd\u4f5c\u3092\u7e70\u308a\u8fd4\u3057\u307e\u3059\u3002<\/p>\n<h4>afterstate<\/h4>\n<p>\u56f32\u306e\u3088\u3046\u306b\u3001Tic-Tac-Toe\u3067\u306f\u72b6\u614b\\(s_t\\)\u306b\u884c\u52d5\\(a_t\\)\u3092\u4f5c\u7528\u3055\u305b\u305f\u7d50\u679c\u306f\u78ba\u7387\u306b\u3088\u3089\u305a\u5e38\u306b\\(s&#8217;_t\\)\u3068\u306a\u308a\u307e\u3059\u3002<\/p>\n<p>\u3053\u306e\u3088\u3046\u306b\u884c\u52d5\u306b\u5bfe\u3059\u308b\u7d50\u679c\u304c\u4e88\u3081\u308f\u304b\u3063\u3066\u3044\u308b\u5834\u5408\u305d\u306e\u7d50\u679c\u3092<strong>afterstate<\/strong>\u3068\u547c\u3073\u3001\u56f32\u306e\u4f8b\u306e\u5834\u5408\u3067\u306f\\(s&#8217;_t = afterstate(s_t, a_t)\\)\u3068\u8868\u3057\u307e\u3059\u3002<\/p>\n<p>\u3053\u306e\u3068\u304d\u884c\u52d5\u4fa1\u5024\u89b3\u6570\\(Q\\)\u306f\u3001\u72b6\u614b\u3068\u884c\u52d5\u306e\u5bfe\\((s, a)\\)\u3092\\(afterstate(s, a)\\)\u306b\u96c6\u7d04\u3059\u308b\u3053\u3068\u3067\u3001\\(Q(s, a) = Q(afterstate(s, a))\\)\u30681\u5909\u6570\u95a2\u6570\u3068\u3057\u3066\u8868\u73fe\u3059\u308b\u3053\u3068\u304c\u53ef\u80fd\u306b\u306a\u308a\u307e\u3059\u3002<\/p>\n<p>\u3053\u3046\u3059\u308b\u3053\u3068\u3067\\(Q\\)\u304c\u5360\u6709\u3059\u308b\u30e1\u30e2\u30ea\u9818\u57df\u3092\u72ed\u3081\u3089\u308c\u308b\u307b\u304b\u3001\u56f33\u306e\u3088\u3046\u306b\u540c\u3058afterstate\u306b\u306a\u308b\u8907\u6570\u306e\u884c\u52d5\u306b\u5bfe\u3059\u308b\u884c\u52d5\u4fa1\u5024\u3092\u3072\u3068\u307e\u3068\u3081\u306b\u7ba1\u7406\u3067\u304d\u308b\u3088\u3046\u306b\u306a\u308a\u5b66\u7fd2\u52b9\u7387\u3082\u4e0a\u304c\u308a\u307e\u3059\u3002<\/p>\n<div id=\"attachment_2036\" style=\"width: 310px\" class=\"wp-caption aligncenter\"><img decoding=\"async\" aria-describedby=\"caption-attachment-2036\" class=\"wp-image-2036 size-medium\" src=\"https:\/\/club.informatix.co.jp\/wp-content\/uploads\/2020-p\/10\/afterstate-300x247.png\" alt=\"Tic-Tac-Toe\u306b\u304a\u3051\u308bafterstate\u306e\u4f8b\" width=\"300\" height=\"247\" \/><p id=\"caption-attachment-2036\" class=\"wp-caption-text\">\u56f33: Tic-Tac-Toe\u306b\u304a\u3051\u308bafterstate\u306e\u4f8b<\/p><\/div>\n<h3><span class=\"ez-toc-section\" id=\"%E5%AE%9F%E9%A8%93\"><\/span>\u5b9f\u9a13<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<p>Q\u5b66\u7fd2\u3067\u5b9f\u88c5\u3057\u305f\u540c\u3058\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u3069\u3046\u3057\u306e5\u4e07\u56de\u306e\u30b2\u30fc\u30e0\u3067\u5b66\u7fd2\u3057\u307e\u3057\u305f\u3002\u30d1\u30e9\u30e1\u30fc\u30bf\u30fc\u306f\u305d\u308c\u305e\u308c\u5272\u5f15\u7387\u3092\\(\\gamma=0.9\\)\u3001\u5b66\u7fd2\u7387\u3092\\(\\alpha=0.1\\)\u3001\u03b5-greedy\u6cd5\u306e\u5b9a\u6570\u3092\\(\\varepsilon=0.5\\)\u3068\u3057\u307e\u3057\u305f\u3002<\/p>\n<p>\u5b66\u7fd2\u56de\u6570\u306b\u5bfe\u3059\u308b\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u306e\u5909\u5316\u3092\u4ee5\u4e0b\u306e\u30b0\u30e9\u30d5\u3067\u793a\u3057\u307e\u3059\u3002\u9752\u304c\u52dd\u3061\u3001\u6a59\u304c\u5f15\u304d\u5206\u3051\u3001\u7dd1\u304c\u8ca0\u3051\u306e\u5272\u5408\u3092\u610f\u5473\u3057\u3066\u3044\u307e\u3059\u3002<\/p>\n<p>\u4e0a\u6bb5\u306f\u30e9\u30f3\u30c0\u30e0\u306b\u624b\u3092\u9078\u3076\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u30681000\u56de\u5bfe\u6226\u3057\u305f\u7d50\u679c\u3067\u3001\u4e0b\u6bb5\u306f\u5b66\u7fd2\u3057\u305f\u30b2\u30fc\u30e0\u56de\u6570\u304c500\u56de\u524d\u306e\u904e\u53bb\u306e\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u30681\u56de\u5bfe\u6226\u3057\u305f\u7d50\u679c\u3067\u3059\u3002\u5de6\u306f\u81ea\u5206\u304c\u5148\u624b\u3067\u3001\u53f3\u306f\u5bfe\u6226\u76f8\u624b\u304c\u5148\u624b\u306e\u3068\u304d\u306e\u7d50\u679c\u306b\u306a\u308a\u307e\u3059\u3002<\/p>\n<div id=\"attachment_11303\" style=\"width: 835px\" class=\"wp-caption aligncenter\"><img decoding=\"async\" aria-describedby=\"caption-attachment-11303\" class=\"wp-image-11303 size-full\" src=\"https:\/\/club.informatix.co.jp\/wp-content\/uploads\/2020\/10\/result100k.png\" alt=\"\u5b9f\u9a13\u7d50\u679c\" width=\"825\" height=\"524\" srcset=\"https:\/\/club.informatix.co.jp\/wp-content\/uploads\/2020\/10\/result100k.png 825w, https:\/\/club.informatix.co.jp\/wp-content\/uploads\/2020\/10\/result100k-300x191.png 300w, https:\/\/club.informatix.co.jp\/wp-content\/uploads\/2020\/10\/result100k-768x488.png 768w\" sizes=\"(max-width: 825px) 100vw, 825px\" \/><p id=\"caption-attachment-11303\" class=\"wp-caption-text\">\u56f34: \u5b9f\u9a13\u7d50\u679c<\/p><\/div>\n<p>\u30e9\u30f3\u30c0\u30e0\u306a\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u3068\u306e\u5bfe\u6226\u7d50\u679c\u3092\u898b\u308b\u3068\u5b66\u7fd2\u306e\u521d\u671f\u3067\u65e2\u306b\u8ca0\u3051\u306f\u306a\u304f\u306a\u308a\u3001\u5b89\u5b9a\u3057\u305f\u52dd\u7387\u304c\u5f97\u3089\u308c\u3066\u3044\u308b\u3053\u3068\u304c\u308f\u304b\u308a\u307e\u3059\u3002\u307e\u305f\u3001\u904e\u53bb\u306e\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u3068\u306e\u5bfe\u6226\u3092\u898b\u3066\u3082\u6700\u521d\u3092\u9664\u3044\u3066\u5168\u3066\u5f15\u304d\u5206\u3051\u3068\u306a\u3063\u3066\u304a\u308a\u5b89\u5b9a\u3057\u3066\u3044\u307e\u3059\u3002<\/p>\n<p>\u904e\u53bb\u306e\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u3068\u306e\u5bfe\u6226\u3067\u52dd\u3061\u304c\u73fe\u308c\u306a\u3044\u306e\u306f\u3001\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u306e\u5b66\u7fd2\u304c\u540c\u3058\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u540c\u58eb\u306e\u5bfe\u6226\u3067\u884c\u308f\u308c\u308b\u3053\u3068\u304b\u3089\u3001\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u304c\u9078\u3073\u3084\u3059\u3044\u624b\u306e\u5b66\u7fd2\u304c\u30e9\u30f3\u30c0\u30e0\u306b\u9078\u3070\u308c\u308b\u624b\u3088\u308a\u3082\u65e9\u304f\u9032\u3093\u3067\u3044\u308b\u3053\u3068\u304c\u8981\u56e0\u3068\u3057\u3066\u8003\u3048\u3089\u308c\u307e\u3059\u3002<\/p>\n<h3><span class=\"ez-toc-section\" id=\"%E3%82%BD%E3%83%BC%E3%82%B9%E3%82%B3%E3%83%BC%E3%83%89\"><\/span>\u30bd\u30fc\u30b9\u30b3\u30fc\u30c9<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<p>Python\u3067\u5b9f\u88c5\u3057\u305f\u30bd\u30fc\u30b9\u30b3\u30fc\u30c9\u3092\u4ee5\u4e0b\u306e\u5834\u6240\u3067\u516c\u958b\u3057\u3066\u3044\u307e\u3059\u3002<\/p>\n<p><a href=\"https:\/\/github.com\/informatix-inc\/q-tic-tac-toe\">informatix-inc\/q-tic-tac-toe: \u5f37\u5316\u5b66\u7fd2(Q\u5b66\u7fd2)\u3067Tic-Tac-Toe\u3092\u653b\u7565\u3059\u308b\u5b9f\u88c5\u4f8b<\/a><\/p>\n<p>save\/tic_tac_toe_q\u306f100\u4e07\u30b2\u30fc\u30e0\u5b66\u7fd2\u3057\u305f\u7d50\u679c\u3067\u3059\u3002tic_tac_toe_demo.py\u3092\u5b9f\u884c\u3059\u308c\u3070\u5b66\u7fd2\u3055\u308c\u305f\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u3068\u30b3\u30f3\u30bd\u30fc\u30eb\u4e0a\u3067\u5bfe\u6226\u3059\u308b\u3053\u3068\u3082\u3067\u304d\u307e\u3059\u3002<\/p>\n<div id=\"attachment_2105\" style=\"width: 730px\" class=\"wp-caption aligncenter\"><img decoding=\"async\" aria-describedby=\"caption-attachment-2105\" class=\"wp-image-2105 size-full\" src=\"https:\/\/club.informatix.co.jp\/wp-content\/uploads\/2020-p\/10\/demo.png\" alt=\"\u5b66\u7fd2\u3055\u308c\u305f\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u3068\u5bfe\u6226\" width=\"720\" height=\"425\" \/><p id=\"caption-attachment-2105\" class=\"wp-caption-text\">\u56f35: \u5b66\u7fd2\u3055\u308c\u305f\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u3068\u5bfe\u6226<\/p><\/div>\n<h2><span class=\"ez-toc-section\" id=\"%E3%81%BE%E3%81%A8%E3%82%81\"><\/span>\u307e\u3068\u3081<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>\u7b2c1\u56de\u306f\u5f37\u5316\u5b66\u7fd2\u306e\u57fa\u790e\u3068\u3001Tic-Tac-Toe\u3092\u7528\u3044\u305fQ\u5b66\u7fd2\u306e\u5b9f\u88c5\u4f8b\u3092\u7d39\u4ecb\u3057\u307e\u3057\u305f\u3002<\/p>\n<p>Tic-Tac-Toe\u306f\u975e\u5e38\u306b\u5358\u7d14\u306a\u30b2\u30fc\u30e0\u3067\u3001\u5f37\u5316\u5b66\u7fd2\u3092\u4f7f\u308f\u306a\u304f\u3066\u3082\u5c11\u3057\u8003\u3048\u308c\u3070\u6700\u9069\u306a\u6226\u7565\u3092\u898b\u3064\u3051\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<\/p>\n<p><a href=\"https:\/\/club.informatix.co.jp\/?p=11282\" target=\"_blank\" rel=\"noopener\">\u6b21\u56de<\/a>\u306f\u6226\u7565\u3092\u898b\u3064\u3051\u308b\u3053\u3068\u304c\u5c11\u3057\u96e3\u3057\u3044\u30b2\u30fc\u30e0\u306b\u5f37\u5316\u5b66\u7fd2\u3092\u9069\u7528\u3057\u3066\u305d\u306e\u52d5\u304d\u3092\u78ba\u304b\u3081\u307e\u3059\u3002<\/p>\n<div class=\"st-header-flexwrap\" style=\"height: auto; width: 100%; box-sizing: border-box; background-color: #efefef; border-radius: 5px; border: solid #ccc 1px; padding: 20px; margin-bottom: 0px;\">\n<div class=\"st-header-flexbox\">\n<p class=\"st-header-flextitle\" style=\"font-size: 110%; color: #000;\">\u7269\u4f53\u691c\u51fa\u306a\u3069AI\u6a5f\u68b0\u5b66\u7fd2\u3092\u6d3b\u7528\u3057\u305f\u30b7\u30b9\u30c6\u30e0\u306e\u69cb\u7bc9\u306b\u95a2\u3059\u308b\u3054\u76f8\u8ac7\uff08\u7121\u6599\uff09\u3092\u627f\u3063\u3066\u3044\u307e\u3059<\/p>\n<p><span class=\"st-mybtn st-mybtn-mini\"><a style=\"background: #01579b; border-radius: 5px; font-weight: normal; color: #fff; box-shadow: 0 3px 0 #01579b;\" href=\"https:\/\/www.informatix.co.jp\/contact-us\/\" target=\"_blank\" rel=\"noopener\">\u304a\u6c17\u8efd\u306b\u304a\u554f\u3044\u5408\u308f\u305b\u304f\u3060\u3055\u3044<i class=\"fa fa-after fa-angle-right st-css-no\" aria-hidden=\"true\"><\/i><\/a><\/span><\/p>\n<\/div>\n<\/div>\n<p>&nbsp;<\/p>\n<h2><span class=\"ez-toc-section\" id=\"%E5%8F%82%E8%80%83%E6%96%87%E7%8C%AE\"><\/span>\u53c2\u8003\u6587\u732e<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>[1] Richard S. Sutton and Andrew G. Barto (2018). Reinforcement Learning: An Introduction. The MIT Press A Bradford Book.<br \/>\n<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u5f37\u5316\u5b66\u7fd2\u306f\u6a5f\u68b0\u5b66\u7fd2\u304c\u7814\u7a76\u3055\u308c\u59cb\u3081\u305f\u521d\u671f\u306e1950\u5e74\u4ee3\u304b\u3089\u5b58\u5728\u3057\u3001\u6b74\u53f2\u7684\u306b\u306f\u300c\u6559\u5e2b\u3042\u308a\u5b66\u7fd2\u300d\u306a\u3069\u4ed6\u306e\u6a5f\u68b0\u5b66\u7fd2\u624b\u6cd5\u306e\u6d3e\u751f\u5143\u306b\u4f4d\u7f6e\u4ed8\u3051\u3089\u308c\u308b\u5206\u91ce\u3067\u3059\u3002 \u6700\u8fd1\u3067\u306f2016\u5e74\u306b\u5f37\u5316\u5b66\u7fd2\u3092\u7528\u3044\u305fGoogle De &#8230; <\/p>\n","protected":false},"author":13,"featured_media":11224,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[589,4],"tags":[],"class_list":["post-2009","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-ai","category-4"],"jetpack_featured_media_url":"https:\/\/club.informatix.co.jp\/wp-content\/uploads\/2020\/11\/20201021-1.png","_links":{"self":[{"href":"https:\/\/club.informatix.co.jp\/index.php?rest_route=\/wp\/v2\/posts\/2009","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/club.informatix.co.jp\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/club.informatix.co.jp\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/club.informatix.co.jp\/index.php?rest_route=\/wp\/v2\/users\/13"}],"replies":[{"embeddable":true,"href":"https:\/\/club.informatix.co.jp\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=2009"}],"version-history":[{"count":24,"href":"https:\/\/club.informatix.co.jp\/index.php?rest_route=\/wp\/v2\/posts\/2009\/revisions"}],"predecessor-version":[{"id":20904,"href":"https:\/\/club.informatix.co.jp\/index.php?rest_route=\/wp\/v2\/posts\/2009\/revisions\/20904"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/club.informatix.co.jp\/index.php?rest_route=\/wp\/v2\/media\/11224"}],"wp:attachment":[{"href":"https:\/\/club.informatix.co.jp\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=2009"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/club.informatix.co.jp\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=2009"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/club.informatix.co.jp\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=2009"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}