self.A = np.random.randn(state_dim, state_dim) * 0.01 self.B = np.random.randn(state_dim, embed_dim) * 0.1 self.C = np.random.randn(vocab_size, state_dim) * 0.1 ...