
import { EntryBody } from "../components/Entry/EntryBody";

export const boring_god_theory: { [id: string]: any } = {

    id: "boring_god_theory",
    title: <>The Boring God Theory of Language Modeling</>,
    date: "Octubre 2023",

    Body: (
        <EntryBody
        paragraphs={[

<div className="font-mono">


</div>,

<div className="font-mono">
The current paradigm of language models is to build a probability distribution on the next word given a context and a corpus.

</div>,

<div className="font-mono">


</div>,

<div className="font-mono">
As corpus and models become bigger, we expect this probability distribution strongly suggest only true possible continuations.

</div>,

<div className="font-mono">


</div>,

<div className="font-mono">
In particular, the cross-entropy loss used in langauge modeling minimizes the amount of bits needed for the (probability generated by the) model to "communicate" the true event (true word in the training corpus). In other words, we are minimizing the information of the next word (wrt the corpus).

</div>,

<div className="font-mono">


</div>,

<div className="font-mono">
What does a Quasi-Ideal LM look like then?! Always predicting the most likely word given the Quasi-Ideal Corpus of Humanity?!

</div>,

<div className="font-mono">


</div>,

<div className="font-mono">
What glorius things can such modeler of such corpus say, an Omnipotent Being in Language!

</div>,

<div className="font-mono">


</div>,

<div className="font-mono">
But... what is the price? Always minimizing information, always continuing the text in the most irrelevant way... Never an unlikely word or a twist...

</div>,

<div className="font-mono">


</div>,

<div className="font-mono">
Insufferable! Utterly boring!!

</div>,

<div className="font-mono">


</div>,

<div className="font-mono">
Trained to generate sentences containing the most minuscule amount of information possible, the AGI could not even die of boredom, but had to keep on

</div>,

      ]}
    />
  ),
};
