package generators.misc;

import algoanim.animalscript.AnimalScript;
import algoanim.primitives.Rect;
import algoanim.primitives.SourceCode;
import algoanim.primitives.Text;
import algoanim.primitives.Variables;
import algoanim.primitives.generators.Language;
import algoanim.properties.AnimationPropertiesKeys;
import algoanim.properties.RectProperties;
import algoanim.properties.SourceCodeProperties;
import algoanim.properties.TextProperties;
import algoanim.util.Coordinates;
import algoanim.util.Offset;
import generators.framework.Generator;
import generators.framework.GeneratorType;
import generators.framework.properties.AnimationPropertiesContainer;
import generators.tree.KDTree;
import java.awt.Color;
import java.awt.Font;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Locale;
import java.util.Random;
import org.apache.commons.jxpath.ri.model.beans.BeanPointerFactory;
import org.apache.commons.math3.optimization.direct.CMAESOptimizer;

/* loaded from: input_file:generators/misc/QLearning.class */
public class QLearning implements Generator {
    private Language lang;
    private Variables vars;
    private Rect[][] qFunctionTable;
    private Text[][] qFunctionText;
    private Rect[][] rectField;
    private Text[][] textField;
    private Text[] textFieldLabel;
    private Text[] annotation;
    private Text aText;
    private Text qFuncLabel;
    private SourceCode code;
    private double alpha;
    private double gamma;
    private int[] coordinatesFinish;
    private int[][] coordinatesTraps;
    private double rewardMove;
    private double rewardFinish;
    private double rewardTrap;
    private SourceCodeProperties sourceCodeProps;
    private TextProperties rectTextProps;
    private RectProperties rectsProps;
    private SourceCodeProperties textsProps;
    double[][] rewards;
    private int detailedIterations;
    private int numberOfUpdates;
    private int learningSteps;
    private int iteration;
    private final String FINISH_SYMBOL = "☆";
    private final String TRAP_SYMBOL = "☠";
    private Color tableStateColor = Color.getHSBColor(0.9f, 0.0f, 0.9f);
    private Color qUpdateColor = new Color(16755283);
    private Color currentStateColor = new Color(15875627);
    private final int LEFT = 0;
    private final int RIGHT = 1;
    private final int UP = 2;
    private final int DOWN = 3;
    double[][][] qFunction = new double[4][3][4];

    @Override // generators.framework.Generator
    public void init() {
        this.lang = new AnimalScript("QLearning", "Dmitrij Kress", BeanPointerFactory.BEAN_POINTER_FACTORY_ORDER, 600);
        this.learningSteps = 0;
        this.iteration = 1;
    }

    @Override // generators.framework.Generator
    public String generate(AnimationPropertiesContainer animationPropertiesContainer, Hashtable<String, Object> hashtable) {
        this.lang.setStepMode(true);
        this.sourceCodeProps = (SourceCodeProperties) animationPropertiesContainer.getPropertiesByName("sourceCode");
        this.rectTextProps = (TextProperties) animationPropertiesContainer.getPropertiesByName("rectText");
        this.rectsProps = (RectProperties) animationPropertiesContainer.getPropertiesByName("rects");
        this.textsProps = (SourceCodeProperties) animationPropertiesContainer.getPropertiesByName(AnimationPropertiesKeys.TEXT_PROPERTY);
        this.coordinatesTraps = (int[][]) hashtable.get("traps");
        this.alpha = Double.valueOf(hashtable.get("alpha").toString()).doubleValue();
        this.gamma = Double.valueOf(hashtable.get("gamma").toString()).doubleValue();
        this.rewardTrap = ((Integer) hashtable.get("rewardTrap")).intValue();
        this.rewardFinish = ((Integer) hashtable.get("rewardFinish")).intValue();
        this.rewardMove = ((Integer) hashtable.get("rewardMove")).intValue();
        this.coordinatesFinish = (int[]) hashtable.get("finish");
        this.numberOfUpdates = ((Integer) hashtable.get("numberOfUpdates")).intValue();
        this.detailedIterations = ((Integer) hashtable.get("detailedIterations")).intValue();
        this.rewards = new double[4][3];
        for (int i = 0; i < 4; i++) {
            for (int i2 = 0; i2 < 3; i2++) {
                if (this.coordinatesFinish[0] == i && this.coordinatesFinish[1] == i2) {
                    this.rewards[i][i2] = this.rewardFinish;
                } else {
                    this.rewards[i][i2] = this.rewardMove;
                    for (int[] iArr : this.coordinatesTraps) {
                        if (iArr[0] == i && iArr[1] == i2) {
                            this.rewards[i][i2] = this.rewardTrap;
                        }
                    }
                }
                Iterator<Integer> it = actionsAvailable(i, i2).iterator();
                while (it.hasNext()) {
                    setQ(i, i2, it.next().intValue(), CMAESOptimizer.DEFAULT_STOPFITNESS);
                }
            }
        }
        this.vars = this.lang.newVariables();
        this.vars.declare("double", "alpha");
        this.vars.set("alpha", new StringBuilder().append(round(this.alpha)).toString());
        this.vars.declare("double", "gamma");
        this.vars.set("gamma", new StringBuilder().append(round(this.gamma)).toString());
        this.vars.declare("int", "iteration");
        this.vars.setGlobal("iteration");
        this.vars.set("iteration", "1");
        introduction1();
        initializeField();
        introduction2();
        initializeQTable();
        initializeAnnotation();
        learn();
        conclusion();
        return this.lang.toString();
    }

    @Override // generators.framework.Generator
    public String getName() {
        return "QLearning";
    }

    @Override // generators.framework.Generator
    public String getAlgorithmName() {
        return "Q-Learning";
    }

    @Override // generators.framework.Generator
    public String getAnimationAuthor() {
        return "Dmitrij Kress";
    }

    @Override // generators.framework.Generator
    public String getDescription() {
        return "Q-Learning is a machine learning algorithm from the reinforcement learning\narea. The goal of the Q-Learning algorithm is to find a function Q which gives the\nutility of a pair (s, a) where s is a state and a is an action. To achieve that the\nalgorithm performs an action and receives the immediate reward. The rewards\nreceived in different states are used to calculate the Q-Function.";
    }

    @Override // generators.framework.Generator
    public String getCodeExample() {
        return "Initialize all Q(s, a)\nLoop\n   Select a state s\n   While (s != target_state)\n      Select an action a and execute it\n      Receive the immediate reward and observe the new state s'\n      Update the Q-Function:\n         Q(s, a) := Q(s, a) + α*((r(s, a) + γmax Q(s', a')) - Q(s, a))\n      s := s'\n";
    }

    @Override // generators.framework.Generator
    public String getFileExtension() {
        return Generator.ANIMALSCRIPT_FORMAT_EXTENSION;
    }

    @Override // generators.framework.Generator
    public Locale getContentLocale() {
        return Locale.ENGLISH;
    }

    @Override // generators.framework.Generator
    public GeneratorType getGeneratorType() {
        return new GeneratorType(GeneratorType.GENERATOR_TYPE_MORE);
    }

    @Override // generators.framework.Generator
    public String getOutputLanguage() {
        return "Pseudo-Code";
    }

    public void introduction1() {
        TextProperties textProperties = new TextProperties();
        textProperties.set("font", new Font("SansSerif", 1, 20));
        textProperties.set("color", Color.BLACK);
        textProperties.set(AnimationPropertiesKeys.DEPTH_PROPERTY, 1);
        Text newText = this.lang.newText(new Coordinates(KDTree.GM_Y0, 20), "Q-Learning", "header", null, textProperties);
        RectProperties rectProperties = new RectProperties();
        rectProperties.set("color", Color.GRAY);
        rectProperties.set(AnimationPropertiesKeys.DEPTH_PROPERTY, 2);
        rectProperties.set(AnimationPropertiesKeys.FILLED_PROPERTY, true);
        rectProperties.set("fillColor", Color.LIGHT_GRAY);
        this.lang.newRect(new Offset(-2, -2, newText, AnimalScript.DIRECTION_NW), new Offset(2, 2, newText, AnimalScript.DIRECTION_SE), "headerRect", null, rectProperties);
        SourceCodeProperties sourceCodeProperties = new SourceCodeProperties();
        sourceCodeProperties.set("font", new Font("SansSerif", 0, 12));
        sourceCodeProperties.set("color", Color.BLACK);
        sourceCodeProperties.set(AnimationPropertiesKeys.DEPTH_PROPERTY, 1);
        SourceCode newSourceCode = this.lang.newSourceCode(new Coordinates(15, 40), AnimationPropertiesKeys.TEXT_PROPERTY, null, this.textsProps);
        newSourceCode.addCodeLine("Q-Learning is a machine learning algorithm from the reinforcement learning", null, 0, null);
        newSourceCode.addCodeLine("area. The goal of the Q-Learning algorithm is to find a function Q which gives the", null, 0, null);
        newSourceCode.addCodeLine("utility of a pair (s, a) where s is a state and a is an action. To achieve that the", null, 0, null);
        newSourceCode.addCodeLine("algorithm performs an action and receives the immediate reward. The rewards", null, 0, null);
        newSourceCode.addCodeLine("received in different states are used to calculate the Q-Function.", null, 0, null);
        this.lang.nextStep("Introduction");
        newSourceCode.hide();
        this.code = this.lang.newSourceCode(new Coordinates(15, 40), AnimationPropertiesKeys.TEXT_PROPERTY, null, this.textsProps);
        this.code.addCodeLine("The outline of the algorithm looks as follows:", null, 0, null);
        this.code.addCodeLine("", null, 0, null);
        this.code.addCodeLine("Initialize all Q(s, a) with 0", null, 0, null);
        this.code.addCodeLine("Loop", null, 0, null);
        this.code.addCodeLine("   Select a state s", null, 0, null);
        this.code.addCodeLine("   While (s != target_state)", null, 0, null);
        this.code.addCodeLine("      Select an action a and execute it", null, 0, null);
        this.code.addCodeLine("      Receive the immediate reward and observe the new state s'", null, 0, null);
        this.code.addCodeLine("      Update the Q-Function:", null, 0, null);
        this.code.addCodeLine("         Q(s, a) := Q(s, a) + α*((r(s, a) + γmax Q(s', a')) - Q(s, a))", null, 0, null);
        this.code.addCodeLine("      s := s'", null, 0, null);
        this.code.addCodeLine("", null, 0, null);
        this.code.addCodeLine("α is learning rate which controls how much the new value will change the function. γ is discount factor. ", null, 0, null);
        this.code.addCodeLine("The purpose of discount factor is to weight immediate rewards higher than the ones further in the future", null, 0, null);
        TextProperties textProperties2 = new TextProperties();
        textProperties2.set("font", new Font("SansSerif", 0, 10));
        textProperties2.set("color", (Color) this.sourceCodeProps.get("color"));
        this.aText = this.lang.newText(new Offset(259, -70, this.code, AnimalScript.DIRECTION_SW), "a'", "aText", null, textProperties2);
        this.lang.nextStep();
        this.code.hide();
    }

    public void introduction2() {
        SourceCodeProperties sourceCodeProperties = new SourceCodeProperties();
        sourceCodeProperties.set("font", new Font("SansSerif", 0, 12));
        sourceCodeProperties.set("color", Color.BLACK);
        sourceCodeProperties.set(AnimationPropertiesKeys.HIGHLIGHTCOLOR_PROPERTY, this.currentStateColor);
        sourceCodeProperties.set(AnimationPropertiesKeys.DEPTH_PROPERTY, 1);
        SourceCode newSourceCode = this.lang.newSourceCode(new Coordinates(40, 250), AnimationPropertiesKeys.TEXT_PROPERTY, null, this.textsProps);
        newSourceCode.addCodeLine("We illustrate how the algorithm works using following example. The", null, 0, null);
        newSourceCode.addCodeLine("goal is to find a shortest path to ☆ from every square while", null, 0, null);
        newSourceCode.addCodeLine("avoiding ☠. Reaching ☆ leads to a reward of " + ((int) this.rewardFinish) + ". ", null, 0, null);
        newSourceCode.addCodeLine("☠ corresponds to a negative reward of " + ((int) this.rewardTrap) + ", and every step costs " + ((int) this.rewardMove) + ".", null, 0, null);
        newSourceCode.addCodeLine("The algorithm chooses an initial state randomly and performs one of", null, 0, null);
        newSourceCode.addCodeLine("the actions which currently have the highest Q-Value (if there are ", null, 0, null);
        newSourceCode.addCodeLine("several such actions, one of them is chosen randomly) and updates the", null, 0, null);
        newSourceCode.addCodeLine("Q-Function until the finish state is reached. After that", null, 0, null);
        newSourceCode.addCodeLine("another random state is chosen. ", null, 0, null);
        newSourceCode.addCodeLine("Best actions according to the Q function are indicated with arrows.", null, 0, null);
        this.code = this.lang.newSourceCode(new Coordinates(15, 40), AnimationPropertiesKeys.TEXT_PROPERTY, null, this.sourceCodeProps);
        this.code.addCodeLine("", null, 0, null);
        this.code.addCodeLine("", null, 0, null);
        this.code.addCodeLine("Initialize all Q(s, a) with 0", null, 0, null);
        this.code.addCodeLine("Loop", null, 0, null);
        this.code.addCodeLine("   Select a state s", null, 0, null);
        this.code.addCodeLine("   While (s != target_state)", null, 0, null);
        this.code.addCodeLine("      Select an action a and execute it", null, 0, null);
        this.code.addCodeLine("      Receive the immediate reward and observe the new state s'", null, 0, null);
        this.code.addCodeLine("      Update the Q-Function:", null, 0, null);
        this.code.addCodeLine("         Q(s, a) := Q(s, a) + α*((r(s, a) + γmax Q(s', a')) - Q(s, a))", null, 0, null);
        this.code.addCodeLine("      s := s'", null, 0, null);
        this.code.addCodeLine("", null, 0, null);
        this.lang.nextStep("Example explained");
        newSourceCode.hide();
    }

    public void conclusion() {
        for (Rect[] rectArr : this.rectField) {
            for (Rect rect : rectArr) {
                rect.hide();
            }
        }
        for (Rect[] rectArr2 : this.qFunctionTable) {
            for (Rect rect2 : rectArr2) {
                rect2.hide();
            }
        }
        for (Text[] textArr : this.qFunctionText) {
            for (Text text : textArr) {
                text.hide();
            }
        }
        for (Text[] textArr2 : this.textField) {
            for (Text text2 : textArr2) {
                text2.hide();
            }
        }
        for (Text text3 : this.annotation) {
            text3.hide();
        }
        this.aText.hide();
        this.code.hide();
        this.qFuncLabel.hide();
        for (Text text4 : this.textFieldLabel) {
            text4.hide();
        }
        SourceCodeProperties sourceCodeProperties = new SourceCodeProperties();
        sourceCodeProperties.set("font", new Font("SansSerif", 0, 12));
        sourceCodeProperties.set("color", Color.BLACK);
        sourceCodeProperties.set(AnimationPropertiesKeys.DEPTH_PROPERTY, 1);
        SourceCode newSourceCode = this.lang.newSourceCode(new Coordinates(15, 40), AnimationPropertiesKeys.TEXT_PROPERTY, null, this.textsProps);
        newSourceCode.addCodeLine("It can be shown that the Q-Function converges towards real value if each action is", null, 0, null);
        newSourceCode.addCodeLine("executed infinitely often in every state. In practice however convergence also occurs ", null, 0, null);
        newSourceCode.addCodeLine("under less strict condition.", null, 0, null);
        this.lang.nextStep("Conclusion");
    }

    public void initializeField() {
        this.rectField = new Rect[4][3];
        for (int i = 0; i < 4; i++) {
            for (int i2 = 0; i2 < 3; i2++) {
                this.rectField[i][i2] = this.lang.newRect(new Coordinates(445 + (i * 40), 70 + (i2 * 40)), new Coordinates(445 + ((i + 1) * 40), 70 + ((i2 + 1) * 40)), "Field_" + i + "_" + i2, null, this.rectsProps);
            }
        }
        TextProperties textProperties = new TextProperties();
        textProperties.set("font", new Font("SansSerif", 0, 40));
        this.rectTextProps.set("font", new Font("SansSerif", 0, 40));
        textProperties.set(AnimationPropertiesKeys.CENTERED_PROPERTY, true);
        textProperties.set("color", Color.BLACK);
        textProperties.set(AnimationPropertiesKeys.DEPTH_PROPERTY, 1);
        this.textField = new Text[4][3];
        this.lang.newText(new Offset(0, 0, this.rectField[0][0], AnimalScript.DIRECTION_N), "A", "tmp", null, this.rectTextProps).hide();
        for (int i3 = 0; i3 < 4; i3++) {
            for (int i4 = 0; i4 < 3; i4++) {
                String str = "  ";
                if (this.coordinatesFinish[0] == i3 && this.coordinatesFinish[1] == i4) {
                    str = "☆";
                } else {
                    for (int[] iArr : this.coordinatesTraps) {
                        if (iArr[0] == i3 && iArr[1] == i4) {
                            str = "☠";
                        }
                    }
                }
                this.textField[i3][i4] = this.lang.newText(new Offset(0, 0, this.rectField[i3][i4], AnimalScript.DIRECTION_N), str, "fieldText_" + i3 + "_" + i4, null, textProperties);
            }
        }
        textProperties.set("font", new Font("SansSerif", 0, 40 / 2));
        textProperties.set("color", Color.DARK_GRAY);
        this.textFieldLabel = new Text[7];
        this.lang.newText(new Offset(0, 0, this.rectField[0][0], AnimalScript.DIRECTION_N), "A", "tmp", null, textProperties).hide();
        for (int i5 = 0; i5 < 4; i5++) {
            this.textFieldLabel[i5] = this.lang.newText(new Offset(0, -27, this.rectField[i5][0], AnimalScript.DIRECTION_N), Integer.toString(i5), "TextFieldLabel_1_" + i5, null, textProperties);
        }
        for (int i6 = 0; i6 < 3; i6++) {
            this.textFieldLabel[i6 + 4] = this.lang.newText(new Offset(-12, -14, this.rectField[0][i6], AnimalScript.DIRECTION_W), Integer.toString(i6), "TextFieldLabel_1_" + i6, null, textProperties);
        }
    }

    private void initializeAnnotation() {
        int i = 12 + 11;
        Coordinates coordinates = new Coordinates(50, 270);
        TextProperties textProperties = new TextProperties();
        textProperties.set("font", new Font("SansSerif", 0, 12));
        textProperties.set("color", Color.BLACK);
        textProperties.set(AnimationPropertiesKeys.DEPTH_PROPERTY, 1);
        this.annotation = new Text[8];
        this.annotation[0] = this.lang.newText(new Offset(0, 0, coordinates, AnimalScript.DIRECTION_NW), "α = " + this.alpha + ",  γ = " + this.gamma, "annotation0", null, textProperties);
        this.annotation[1] = this.lang.newText(new Offset(0, i, this.annotation[0], AnimalScript.DIRECTION_NW), "", "annotation1", null, textProperties);
        this.annotation[2] = this.lang.newText(new Offset(0, i, this.annotation[1], AnimalScript.DIRECTION_NW), "", "annotation2", null, textProperties);
        this.annotation[3] = this.lang.newText(new Offset(0, i, this.annotation[2], AnimalScript.DIRECTION_NW), "", "annotation3", null, textProperties);
        this.annotation[4] = this.lang.newText(new Offset(0, i, this.annotation[3], AnimalScript.DIRECTION_NW), "", "annotation4", null, textProperties);
        this.annotation[5] = this.lang.newText(new Offset(0, i, this.annotation[4], AnimalScript.DIRECTION_NW), "", "annotation5", null, textProperties);
        this.annotation[6] = this.lang.newText(new Offset(0, i, this.annotation[5], AnimalScript.DIRECTION_NW), "", "annotation5", null, textProperties);
        this.annotation[7] = this.lang.newText(new Offset(0, -i, this.annotation[0], AnimalScript.DIRECTION_NW), "Iteration ", "annotation0", null, textProperties);
    }

    public void initializeQTable() {
        this.code.highlight(2);
        RectProperties rectProperties = new RectProperties();
        rectProperties.set("color", Color.GRAY);
        rectProperties.set(AnimationPropertiesKeys.DEPTH_PROPERTY, 2);
        rectProperties.set(AnimationPropertiesKeys.FILLED_PROPERTY, true);
        rectProperties.set("fillColor", Color.WHITE);
        this.qFunctionTable = new Rect[5][13];
        for (int i = 0; i < 5; i++) {
            for (int i2 = 0; i2 < 13; i2++) {
                this.qFunctionTable[i][i2] = this.lang.newRect(new Coordinates(620 + (i * 60), 50 + (i2 * 26)), new Coordinates(620 + ((i + 1) * 60), 50 + ((i2 + 1) * 26)), "Field_" + i + "_" + i2, null, rectProperties);
            }
            this.qFunctionTable[i][0].changeColor("fillColor", Color.LIGHT_GRAY, null, null);
        }
        TextProperties textProperties = new TextProperties();
        textProperties.set("font", new Font("SansSerif", 0, 13));
        textProperties.set("color", Color.BLACK);
        textProperties.set(AnimationPropertiesKeys.DEPTH_PROPERTY, 1);
        textProperties.set(AnimationPropertiesKeys.CENTERED_PROPERTY, true);
        this.lang.newText(new Offset(0, 0, this.qFunctionTable[0][0], AnimalScript.DIRECTION_N), "A", "tmp", null, textProperties).hide();
        this.qFunctionText = new Text[5][13];
        this.qFunctionText[0][0] = this.lang.newText(new Offset(0, 5, this.qFunctionTable[0][0], AnimalScript.DIRECTION_N), "State", "", null, textProperties);
        this.qFunctionText[1][0] = this.lang.newText(new Offset(0, 5, this.qFunctionTable[1][0], AnimalScript.DIRECTION_N), "left", "", null, textProperties);
        this.qFunctionText[2][0] = this.lang.newText(new Offset(0, 5, this.qFunctionTable[2][0], AnimalScript.DIRECTION_N), AnimationPropertiesKeys.RIGHT_PROPERTY, "", null, textProperties);
        this.qFunctionText[3][0] = this.lang.newText(new Offset(0, 5, this.qFunctionTable[3][0], AnimalScript.DIRECTION_N), "up", "", null, textProperties);
        this.qFunctionText[4][0] = this.lang.newText(new Offset(0, 5, this.qFunctionTable[4][0], AnimalScript.DIRECTION_N), "down", "", null, textProperties);
        for (int i3 = 0; i3 < 4; i3++) {
            for (int i4 = 0; i4 < 3; i4++) {
                this.qFunctionText[0][1 + (i4 * 4) + i3] = this.lang.newText(new Offset(0, 5, this.qFunctionTable[0][1 + (i4 * 4) + i3], AnimalScript.DIRECTION_N), "(" + i4 + ", " + i3 + ")", "", null, textProperties);
                this.qFunctionTable[0][1 + (i4 * 4) + i3].changeColor("fillColor", this.tableStateColor, null, null);
            }
        }
        for (int i5 = 1; i5 < 5; i5++) {
            for (int i6 = 1; i6 < 13; i6++) {
                double q = getQ((i6 - 1) % 4, (i6 - 1) / 4, i5 - 1);
                String sb = !Double.isNaN(q) ? new StringBuilder().append(round(q)).toString() : "  --";
                this.qFunctionText[i5][i6] = this.lang.newText(new Offset(-8, 5, this.qFunctionTable[i5][i6], AnimalScript.DIRECTION_N), "       ", "", null, textProperties);
                this.qFunctionText[i5][i6].setText(sb, null, null);
            }
        }
        textProperties.set("font", new Font("SansSerif", 1, 13));
        this.qFuncLabel = this.lang.newText(new Offset(0, (-13) - 4, this.qFunctionTable[2][0], AnimalScript.DIRECTION_N), "Q-Function", "tmp", null, textProperties);
        this.lang.nextStep("Demonstration");
    }

    private void showArrows() {
        for (int i = 0; i < 4; i++) {
            for (int i2 = 0; i2 < 3; i2++) {
                boolean z = true;
                if (this.coordinatesFinish[0] == i && this.coordinatesFinish[1] == i2) {
                    z = false;
                }
                for (int[] iArr : this.coordinatesTraps) {
                    if (iArr[0] == i && iArr[1] == i2) {
                        z = false;
                    }
                }
                if (z) {
                    Double valueOf = Double.valueOf(Double.NEGATIVE_INFINITY);
                    boolean z2 = false;
                    int i3 = -1;
                    Iterator<Integer> it = actionsAvailable(i, i2).iterator();
                    while (it.hasNext()) {
                        Integer next = it.next();
                        if (getQ(i, i2, next.intValue()) > valueOf.doubleValue()) {
                            i3 = next.intValue();
                            valueOf = Double.valueOf(getQ(i, i2, next.intValue()));
                            z2 = false;
                        } else if (getQ(i, i2, next.intValue()) == valueOf.doubleValue()) {
                            z2 = true;
                        }
                    }
                    if (!z2) {
                        switch (i3) {
                            case 0:
                                if (this.textField[i][i2].getText().equals("←")) {
                                    break;
                                } else {
                                    this.textField[i][i2].setText("←", null, null);
                                    break;
                                }
                            case 1:
                                if (this.textField[i][i2].getText().equals("→")) {
                                    break;
                                } else {
                                    this.textField[i][i2].setText("→", null, null);
                                    break;
                                }
                            case 2:
                                if (this.textField[i][i2].getText().equals("↑")) {
                                    break;
                                } else {
                                    this.textField[i][i2].setText("↑", null, null);
                                    break;
                                }
                            case 3:
                                if (this.textField[i][i2].getText().equals("↓")) {
                                    break;
                                } else {
                                    this.textField[i][i2].setText("↓", null, null);
                                    break;
                                }
                        }
                    } else if (!this.textField[i][i2].getText().equals(" ")) {
                        this.textField[i][i2].setText(" ", null, null);
                    }
                }
            }
        }
    }

    public void learn() {
        this.code.unhighlight(2);
        this.code.highlight(4);
        while (this.learningSteps <= this.numberOfUpdates) {
            learnCycle();
            this.iteration++;
        }
    }

    private void learnCycle() {
        Random random = new Random();
        int[] iArr = {random.nextInt(4), random.nextInt(3)};
        Color color = (Color) this.rectsProps.get("fillColor");
        this.vars.set("iteration", new StringBuilder().append(this.iteration).toString());
        this.annotation[7].setText("Iteration " + this.iteration, null, null);
        this.annotation[0].setText("s := (" + iArr[1] + ", " + iArr[0] + ")", null, null);
        this.rectField[iArr[0]][iArr[1]].changeColor("fillColor", this.currentStateColor, null, null);
        this.qFunctionTable[0][(iArr[1] * 4) + iArr[0] + 1].changeColor("fillColor", this.currentStateColor, null, null);
        this.lang.nextStep("Iteration " + this.iteration);
        if (iArr[0] == this.coordinatesFinish[0] && iArr[1] == this.coordinatesFinish[1]) {
            this.rectField[iArr[0]][iArr[1]].changeColor("fillColor", color, null, null);
            this.qFunctionTable[0][(iArr[1] * 4) + iArr[0] + 1].changeColor("fillColor", this.tableStateColor, null, null);
        }
        for (int i = 0; i <= 20 && this.learningSteps <= this.numberOfUpdates; i++) {
            LinkedList linkedList = null;
            double d = Double.NEGATIVE_INFINITY;
            Iterator<Integer> it = actionsAvailable(iArr[0], iArr[1]).iterator();
            while (it.hasNext()) {
                Integer next = it.next();
                if (getQ(iArr[0], iArr[1], next.intValue()) > d) {
                    linkedList = new LinkedList();
                    linkedList.add(next);
                    d = getQ(iArr[0], iArr[1], next.intValue());
                } else if (getQ(iArr[0], iArr[1], next.intValue()) == d) {
                    linkedList.add(next);
                }
            }
            int intValue = ((Integer) linkedList.get(random.nextInt(linkedList.size()))).intValue();
            int[] stateAfterAction = stateAfterAction(iArr, intValue);
            double d2 = Double.NEGATIVE_INFINITY;
            Iterator<Integer> it2 = actionsAvailable(stateAfterAction[0], stateAfterAction[1]).iterator();
            while (it2.hasNext()) {
                Integer next2 = it2.next();
                if (getQ(stateAfterAction[0], stateAfterAction[1], next2.intValue()) > d2) {
                    d2 = getQ(stateAfterAction[0], stateAfterAction[1], next2.intValue());
                }
            }
            if (iArr[0] == 1 && iArr[1] == 0) {
                return;
            }
            double q = getQ(iArr[0], iArr[1], intValue);
            double d3 = q + (this.alpha * ((this.rewards[stateAfterAction[0]][stateAfterAction[1]] + (this.gamma * d2)) - q));
            showArrows();
            setQ(iArr[0], iArr[1], intValue, d3);
            this.learningSteps++;
            String str = intValue <= 1 ? intValue == 0 ? "left" : AnimationPropertiesKeys.RIGHT_PROPERTY : intValue == 2 ? "up" : "down";
            String str2 = "Q((" + iArr[1] + ", " + iArr[0] + "), " + str + ") := " + round(q) + " + " + this.alpha + " * (" + this.rewards[stateAfterAction[0]][stateAfterAction[1]] + " + " + this.gamma + " * " + round(d2);
            String str3 = String.valueOf(q >= CMAESOptimizer.DEFAULT_STOPFITNESS ? String.valueOf(str2) + " - " + round(q) : String.valueOf(str2) + " - (" + round(q) + ")") + ") = " + round(d3);
            this.annotation[1].setText("a := " + str, null, null);
            this.rectField[stateAfterAction[0]][stateAfterAction[1]].changeColor("fillColor", Color.PINK, null, null);
            this.qFunctionTable[intValue + 1][0].changeColor("fillColor", Color.PINK, null, null);
            this.code.unhighlight(4);
            if (this.learningSteps <= this.detailedIterations) {
                this.code.unhighlight(10);
                this.code.highlight(6);
                this.lang.nextStep();
                this.code.unhighlight(6);
            } else {
                this.code.highlight(5);
            }
            this.annotation[2].setText("r((" + iArr[1] + ", " + iArr[0] + "), " + str + ") = " + this.rewards[stateAfterAction[0]][stateAfterAction[1]], null, null);
            this.annotation[3].setText("s' = (" + stateAfterAction[1] + ", " + stateAfterAction[0] + ")", null, null);
            if (this.learningSteps <= this.detailedIterations) {
                this.code.highlight(7);
                this.lang.nextStep();
                this.code.unhighlight(7);
                this.code.highlight(8);
                this.code.highlight(9);
                this.aText.changeColor("color", (Color) this.sourceCodeProps.get(AnimationPropertiesKeys.HIGHLIGHTCOLOR_PROPERTY), null, null);
            }
            this.qFunctionTable[intValue + 1][(iArr[1] * 4) + iArr[0] + 1].changeColor("fillColor", this.qUpdateColor, null, null);
            this.annotation[4].setText(str3, null, null);
            this.qFunctionText[intValue + 1][1 + (iArr[1] * 4) + iArr[0]].setText(new StringBuilder(String.valueOf(round(d3))).toString(), null, null);
            showArrows();
            this.lang.nextStep();
            this.qFunctionTable[intValue + 1][(iArr[1] * 4) + iArr[0] + 1].changeColor("fillColor", Color.WHITE, null, null);
            if (this.learningSteps <= this.detailedIterations) {
                this.code.unhighlight(8);
                this.code.unhighlight(9);
                this.aText.changeColor("color", (Color) this.sourceCodeProps.get("color"), null, null);
                this.code.highlight(10);
            }
            this.annotation[0].setText("s := (" + stateAfterAction[0] + ", " + stateAfterAction[1] + ")", null, null);
            this.annotation[1].setText("", null, null);
            this.annotation[2].setText("", null, null);
            this.annotation[3].setText("", null, null);
            this.annotation[4].setText("", null, null);
            this.rectField[iArr[0]][iArr[1]].changeColor("fillColor", color, null, null);
            this.qFunctionTable[intValue + 1][0].changeColor("fillColor", Color.LIGHT_GRAY, null, null);
            this.qFunctionTable[0][(iArr[1] * 4) + iArr[0] + 1].changeColor("fillColor", this.tableStateColor, null, null);
            this.rectField[stateAfterAction[0]][stateAfterAction[1]].changeColor("fillColor", this.currentStateColor, null, null);
            this.qFunctionTable[0][(stateAfterAction[1] * 4) + stateAfterAction[0] + 1].changeColor("fillColor", this.currentStateColor, null, null);
            if (this.learningSteps <= this.detailedIterations) {
                this.lang.nextStep();
            }
            if ((stateAfterAction[0] == this.coordinatesFinish[0] && stateAfterAction[1] == this.coordinatesFinish[1]) || i == 20) {
                this.code.unhighlight(10);
                if (this.learningSteps > this.detailedIterations) {
                    this.annotation[0].setText("s := (" + stateAfterAction[0] + ", " + stateAfterAction[1] + ")", null, null);
                    this.annotation[1].setText("", null, null);
                    this.annotation[2].setText("", null, null);
                    this.annotation[3].setText("", null, null);
                    this.annotation[4].setText("", null, null);
                    this.rectField[stateAfterAction[0]][stateAfterAction[1]].changeColor("fillColor", this.currentStateColor, null, null);
                    this.lang.nextStep();
                    this.code.unhighlight(5);
                }
                this.code.highlight(4);
                this.rectField[stateAfterAction[0]][stateAfterAction[1]].changeColor("fillColor", color, null, null);
                this.qFunctionTable[0][(stateAfterAction[1] * 4) + stateAfterAction[0] + 1].changeColor("fillColor", this.tableStateColor, null, null);
            }
            if (this.learningSteps == this.detailedIterations) {
                this.code.unhighlight(10);
            }
            iArr = stateAfterAction;
        }
    }

    private int[] stateAfterAction(int[] iArr, int i) {
        int[] iArr2 = {iArr[0], iArr[1]};
        if (i == 0) {
            iArr2[0] = iArr2[0] - 1;
        }
        if (i == 1) {
            iArr2[0] = iArr2[0] + 1;
        }
        if (i == 2) {
            iArr2[1] = iArr2[1] - 1;
        }
        if (i == 3) {
            iArr2[1] = iArr2[1] + 1;
        }
        return iArr2;
    }

    private double getQ(int i, int i2, int i3) {
        Iterator<Integer> it = actionsAvailable(i, i2).iterator();
        while (it.hasNext()) {
            if (it.next().intValue() == i3) {
                return this.qFunction[i][i2][i3];
            }
        }
        return Double.NaN;
    }

    private void setQ(int i, int i2, int i3, double d) {
        this.qFunction[i][i2][i3] = d;
    }

    private LinkedList<Integer> actionsAvailable(int i, int i2) {
        LinkedList<Integer> linkedList = new LinkedList<>();
        if (i > 0) {
            linkedList.add(0);
        }
        if (i < 3) {
            linkedList.add(1);
        }
        if (i2 < 2) {
            linkedList.add(3);
        }
        if (i2 > 0) {
            linkedList.add(2);
        }
        return linkedList;
    }

    private static double round(double d) {
        return Math.round(d * r0) / ((long) Math.pow(10.0d, 2.0d));
    }
}
