Internals

`cmd.py`

Module containing the implementation for the kebbie command line.

`instantiate_correctors(keyboard, get_layout=True, fast_mode=True, instantiate_emulator=True)`

Create the right correctors (with the right platform, etc...) given the arguments from the command line.

Parameters:

Name	Type	Description	Default
`keyboard`	`str`	Name fo the keyboard to load.	required
`fast_mode`	`bool`	If `True`, the corrector will be instantiated in fast mode (only AC).	`True`
`instantiate_emulator`	`bool`	If `True`, the emulators are instantiated (which trigger the layout detection). If `False`, only the corrector is instantiated, not the emulator.	`True`
`get_layout`	`bool`	If `True`, The keyboard keys and suggestions will be mapped and shown on screen.	`True`

Returns:

Type	Description
`List[EmulatorCorrector]`	The list of created Correctors.

Source code in kebbie/cmd.py

def instantiate_correctors(
    keyboard: str, get_layout: bool = True, fast_mode: bool = True, instantiate_emulator: bool = True
) -> List[EmulatorCorrector]:
    """Create the right correctors (with the right platform, etc...) given the
    arguments from the command line.

    Args:
        keyboard (str): Name fo the keyboard to load.
        fast_mode (bool, optional): If `True`, the corrector will be
            instantiated in fast mode (only AC).
        instantiate_emulator (bool, optional): If `True`, the emulators are
            instantiated (which trigger the layout detection). If `False`, only
            the corrector is instantiated, not the emulator.
        get_layout (bool, optional):  If `True`, The keyboard keys and suggestions
            will be mapped and shown on screen.

    Returns:
        The list of created Correctors.
    """
    if keyboard in ["gboard", "tappa", "swiftkey", "yandex"]:
        # Android keyboards
        return [
            EmulatorCorrector(
                device=d,
                platform="android",
                keyboard=keyboard,
                fast_mode=fast_mode,
                instantiate_emulator=instantiate_emulator,
                get_layout=get_layout,
            )
            for d in Emulator.get_android_devices()
        ]
    else:
        # iOS keyboards
        return [
            EmulatorCorrector(
                device=i,
                platform="ios",
                keyboard=keyboard,
                fast_mode=fast_mode,
                instantiate_emulator=instantiate_emulator,
                ios_name=ios_name,
                ios_platform=ios_platform,
                get_layout=get_layout,
            )
            for i, (ios_platform, ios_name) in enumerate(Emulator.get_ios_devices())
        ]

`common_args(parser)`

Add common arguments to the given parser.

Parameters:

Name	Type	Description	Default
`parser`	`ArgumentParser`	Parser where to add the arguments.	required

Source code in kebbie/cmd.py

def common_args(parser: argparse.ArgumentParser):
    """Add common arguments to the given parser.

    Args:
        parser (argparse.ArgumentParser): Parser where to add the arguments.
    """
    parser.add_argument(
        "--keyboard",
        "-K",
        dest="keyboard",
        type=str,
        required=True,
        choices=["gboard", "ios", "kbkitpro", "kbkitoss", "tappa", "fleksy", "swiftkey", "yandex"],
        help="Which keyboard, to be tested, is currently installed on the emulator.",
    )

`cli()`

Entry-point of the kebbie command line.

Source code in kebbie/cmd.py

def cli():
    """Entry-point of the `kebbie` command line."""
    # create the top-level parser
    parser = argparse.ArgumentParser(description="Kebbie's command line.")
    subparsers = parser.add_subparsers(title="commands", dest="cmd")

    evaluate_parser = subparsers.add_parser("evaluate", help="Run the evaluation using emulated keyboard.")
    evaluate_parser.set_defaults(cmd="evaluate")
    common_args(evaluate_parser)
    evaluate_parser.add_argument(
        "--result_file",
        "-R",
        dest="result_file",
        type=str,
        default="results.json",
        help="When to save the results of the evaluation",
    )
    evaluate_parser.add_argument(
        "--all_tasks",
        "-A",
        dest="all_tasks",
        action="store_true",
        default=False,
        help="If specified, all tasks are evaluated (not only auto-correction, but also auto-completion and "
        "next-word prediction).",
    )
    evaluate_parser.add_argument(
        "--n_sentences",
        "-N",
        dest="n_sentences",
        type=int,
        default=100,
        help="The number of sentences to use for the evaluation. Emulated keyboard are slow, so we can't run on the "
        "full test set. Instead we pick the first N sentences.",
    )
    evaluate_parser.add_argument(
        "--track_mistakes",
        "-T",
        dest="track_mistakes",
        action="store_true",
        default=False,
        help="If specified, mistakes will be tracked and saved in the result file.",
    )

    layout_parser = subparsers.add_parser(
        "show_layout", help="Display the layout over the keyboard for debugging purpose."
    )
    layout_parser.set_defaults(cmd="show_layout")
    common_args(layout_parser)

    page_source_parser = subparsers.add_parser(
        "get_page_source", help="Save the page source of the keyboard in a file for debugging purpose."
    )
    page_source_parser.set_defaults(cmd="get_page_source")
    common_args(page_source_parser)
    page_source_parser.add_argument(
        "--page_source_file",
        "-F",
        dest="page_source_file",
        type=str,
        default="keyboard_page_source.xml",
        help="Where to save the keyboard page source",
    )
    page_source_parser.add_argument(
        "--print_page_source",
        "-P",
        dest="print_page_source",
        action="store_true",
        default=False,
        help="If specified, the page source will be shown in console too.",
    )

    args = parser.parse_args()

    if args.cmd is None:
        parser.print_help(sys.stderr)
        sys.exit(1)
    elif args.cmd == "evaluate":
        correctors = instantiate_correctors(args.keyboard, fast_mode=not args.all_tasks, instantiate_emulator=False)

        # Get dataset, and filter it to keep only a small number of sentences
        dataset = get_soda_dataset(args.n_sentences)

        # Run the evaluation
        results = evaluate(correctors, dataset=dataset, track_mistakes=args.track_mistakes)

        # Save the results in a file
        with open(args.result_file, "w", encoding="utf-8") as f:
            json.dump(results, f, ensure_ascii=False, indent=4)

        print("Overall score : ", results["overall_score"])

    elif args.cmd == "show_layout":
        correctors = instantiate_correctors(args.keyboard)
        for c in correctors:
            c.emulator.show_keyboards()
            print(f"Predictions : {c.emulator.get_predictions()}")

    elif args.cmd == "get_page_source":
        correctors = instantiate_correctors(args.keyboard, get_layout=False)

        for c in correctors:
            # Get the page source
            page_source = ET.fromstring(c.emulator.driver.page_source)

            # Get the keyboard package name
            keyboard_package = emulator.KEYBOARD_PACKAGE.get(args.keyboard, None)

            if keyboard_package:
                # Filter elements that have the specified package
                filtered_elements = [element for element in page_source if element.get("package") == keyboard_package]

                if filtered_elements:
                    # If there are filtered elements, create a new XML with those elements
                    filtered_page_source = ET.Element(page_source.tag, page_source.attrib)
                    filtered_page_source.extend(filtered_elements)
                    page_source = filtered_page_source

            page_source_str = ET.tostring(page_source, encoding="utf8").decode("utf8")

            # Print the keyboard elements to the console if specified
            if args.print_page_source:
                print(page_source_str)

            # Save the keyboard elements to a file
            with open(args.page_source_file, "w", encoding="utf-8") as file:
                file.write(page_source_str)

`correctors.py`

Module containing the base Corrector class.

`EmulatorCorrector`

Bases: Corrector

Corrector using an emulated keyboard.

Parameters:

Name	Type	Description	Default
`platform`	`str`	Name of the platform used. `android` or `ios`.	required
`keyboard`	`str`	Name of the keyboard to test.	required
`device`	`str`	Device UDID to use for the emulator.	`None`
`fast_mode`	`bool`	If `True`, only auto-correction will be tested, and suggestions will not be retrieved. This is faster because we don't take screenshot and run the OCR.	`True`
`instantiate_emulator`	`bool`	If `False`, the emulator is not initialized (It will only be initialized after being pickled). This is useful to quickly create instances of this class, without going through the whole layout detection (which takes time) 2 times : at initialization and after being pickled.	`True`

Source code in kebbie/correctors.py

class EmulatorCorrector(Corrector):
    """Corrector using an emulated keyboard.

    Args:
        platform (str): Name of the platform used. `android` or `ios`.
        keyboard (str): Name of the keyboard to test.
        device (str): Device UDID to use for the emulator.
        fast_mode (bool): If `True`, only auto-correction will be tested,
            and suggestions will not be retrieved. This is faster because
            we don't take screenshot and run the OCR.
        instantiate_emulator (bool): If `False`, the emulator is not
            initialized (It will only be initialized after being pickled).
            This is useful to quickly create instances of this class,
            without going through the whole layout detection (which takes
            time) 2 times : at initialization and after being pickled.
    """

    def __init__(
        self,
        platform: str,
        keyboard: str,
        device: str = None,
        fast_mode: bool = True,
        ios_name: str = None,
        ios_platform: str = None,
        instantiate_emulator: bool = True,
        get_layout: bool = True,
    ):
        super().__init__()

        self.platform = platform
        self.keyboard = keyboard
        self.device = device
        self.fast_mode = fast_mode
        self.ios_name = ios_name
        self.ios_platform = ios_platform
        self.get_layout = get_layout

        self.emulator = None
        if instantiate_emulator:
            self.emulator = Emulator(
                self.platform,
                self.keyboard,
                device=self.device,
                ios_name=self.ios_name,
                ios_platform=self.ios_platform,
                get_layout=self.get_layout,
            )

        # Typing on keyboard is slow. Because we go through several AC calls
        # in one sentence, keep track of the previously typed context, so we
        # can just type the remaining characters
        self.previous_context = ""

    def __reduce__(self) -> Tuple:
        """This method simply makes the object pickable.

        Returns:
            Tuple of callable and arguments.
        """
        return (
            self.__class__,
            (self.platform, self.keyboard, self.device, self.fast_mode, self.ios_name, self.ios_platform),
        )

    def cached_type(self, context: str, word: str):
        """This class keeps track of the content of the context currently
        typed in the emulator. This method uses this current context to
        determine if we need to retype the sentence or not. Instead of
        always erasing the content being typed, we can directly type the
        remaining characters, which saves up time.

        Args:
            context (str): Context to paste.
            word (str): Word to type.
        """
        sentence = context + word
        if sentence.startswith(self.previous_context):
            # The sentence to type start similarly as the previous context
            # Don't retype everything, just what we need
            self.emulator.type_characters(sentence[len(self.previous_context) :])
        else:
            # The previous context is not right, erase everything and type it
            self.emulator.paste(context)
            self.emulator.type_characters(word)
        self.previous_context = sentence

    def auto_correct(
        self,
        context: str,
        keystrokes: List[Optional[Tuple[float, float]]],
        word: str,
    ) -> List[str]:
        """Implementation of `auto_correct` method for emulated keyboards.

        Args:
            context (str): String representing the previously typed characters
                (the beginning of the sentence basically).
            keystrokes (List[Optional[Tuple[float, float]]]): List of positions
                (x and y coordinates) for each keystroke of the word being
                typed.
            word (str): Word being typed (corresponding to the keystrokes).

        Returns:
            The list of correction candidates.
        """
        self.cached_type(context, word)
        candidates = self.emulator.get_predictions() if not self.fast_mode else []

        candidates = [c for c in candidates if c != ""]

        # On keyboard, the leftmost candidate is the word being typed without
        # any change. If the word doesn't have a typo, this first candidate
        # should be kept as the auto-correction, but if the word has a typo,
        # we should remove it from the candidates list (as it will be
        # auto-corrected).
        # In order to know if it will be auto-corrected or not, we have no
        # choice but type a space and retrieve the current text to see if it
        # was auto-corrected or not.
        self.emulator.type_characters(" ")
        self.previous_context = self.emulator.get_text()
        autocorrection = self.previous_context[len(context) :].strip()

        if len(candidates) == 0:
            candidates = [autocorrection]
        elif candidates[0] != autocorrection:
            candidates.pop(0)
            if autocorrection not in candidates:
                candidates.insert(0, autocorrection)

        return candidates

    def auto_complete(
        self,
        context: str,
        keystrokes: List[Optional[Tuple[float, float]]],
        partial_word: str,
    ) -> List[str]:
        """Implementation of `auto_complete` method for emulated keyboards.

        Args:
            context (str): String representing the previously typed characters
                (the beginning of the sentence basically).
            keystrokes (List[Optional[Tuple[float, float]]]): List of positions
                (x and y coordinates) for each keystroke of the word being
                typed.
            partial_word (str): Partial word being typed (corresponding to the
                keystrokes).

        Returns:
            The list of completion candidates.
        """
        if self.fast_mode:
            return []

        self.cached_type(context, partial_word)
        candidates = self.emulator.get_predictions()

        candidates = [c for c in candidates if c != ""]

        return candidates

    def predict_next_word(self, context: str) -> List[str]:
        """Implementation of `predict_next_word` method for emulated keyboards.

        Args:
            context (str): String representing the previously typed characters
                (the beginning of the sentence basically).

        Returns:
            The list of next-word candidates.
        """
        if self.fast_mode:
            return []

        # In order to get the predictions, the space should be typed
        assert context[-1] == " "
        self.cached_type(context[:-1], " ")
        candidates = self.emulator.get_predictions()
        candidates = [c for c in candidates if c != ""]

        return candidates

`reduce()`

This method simply makes the object pickable.

Returns:

Type	Description
`Tuple`	Tuple of callable and arguments.

Source code in kebbie/correctors.py

def __reduce__(self) -> Tuple:
    """This method simply makes the object pickable.

    Returns:
        Tuple of callable and arguments.
    """
    return (
        self.__class__,
        (self.platform, self.keyboard, self.device, self.fast_mode, self.ios_name, self.ios_platform),
    )

`cached_type(context, word)`

This class keeps track of the content of the context currently typed in the emulator. This method uses this current context to determine if we need to retype the sentence or not. Instead of always erasing the content being typed, we can directly type the remaining characters, which saves up time.

Parameters:

Name	Type	Description	Default
`context`	`str`	Context to paste.	required
`word`	`str`	Word to type.	required

Source code in kebbie/correctors.py

def cached_type(self, context: str, word: str):
    """This class keeps track of the content of the context currently
    typed in the emulator. This method uses this current context to
    determine if we need to retype the sentence or not. Instead of
    always erasing the content being typed, we can directly type the
    remaining characters, which saves up time.

    Args:
        context (str): Context to paste.
        word (str): Word to type.
    """
    sentence = context + word
    if sentence.startswith(self.previous_context):
        # The sentence to type start similarly as the previous context
        # Don't retype everything, just what we need
        self.emulator.type_characters(sentence[len(self.previous_context) :])
    else:
        # The previous context is not right, erase everything and type it
        self.emulator.paste(context)
        self.emulator.type_characters(word)
    self.previous_context = sentence

`auto_correct(context, keystrokes, word)`

Implementation of auto_correct method for emulated keyboards.

Parameters:

Name	Type	Description	Default
`context`	`str`	String representing the previously typed characters (the beginning of the sentence basically).	required
`keystrokes`	`List[Optional[Tuple[float, float]]]`	List of positions (x and y coordinates) for each keystroke of the word being typed.	required
`word`	`str`	Word being typed (corresponding to the keystrokes).	required

Returns:

Type	Description
`List[str]`	The list of correction candidates.

Source code in kebbie/correctors.py

def auto_correct(
    self,
    context: str,
    keystrokes: List[Optional[Tuple[float, float]]],
    word: str,
) -> List[str]:
    """Implementation of `auto_correct` method for emulated keyboards.

    Args:
        context (str): String representing the previously typed characters
            (the beginning of the sentence basically).
        keystrokes (List[Optional[Tuple[float, float]]]): List of positions
            (x and y coordinates) for each keystroke of the word being
            typed.
        word (str): Word being typed (corresponding to the keystrokes).

    Returns:
        The list of correction candidates.
    """
    self.cached_type(context, word)
    candidates = self.emulator.get_predictions() if not self.fast_mode else []

    candidates = [c for c in candidates if c != ""]

    # On keyboard, the leftmost candidate is the word being typed without
    # any change. If the word doesn't have a typo, this first candidate
    # should be kept as the auto-correction, but if the word has a typo,
    # we should remove it from the candidates list (as it will be
    # auto-corrected).
    # In order to know if it will be auto-corrected or not, we have no
    # choice but type a space and retrieve the current text to see if it
    # was auto-corrected or not.
    self.emulator.type_characters(" ")
    self.previous_context = self.emulator.get_text()
    autocorrection = self.previous_context[len(context) :].strip()

    if len(candidates) == 0:
        candidates = [autocorrection]
    elif candidates[0] != autocorrection:
        candidates.pop(0)
        if autocorrection not in candidates:
            candidates.insert(0, autocorrection)

    return candidates

`auto_complete(context, keystrokes, partial_word)`

Implementation of auto_complete method for emulated keyboards.

Parameters:

Name	Type	Description	Default
`context`	`str`	String representing the previously typed characters (the beginning of the sentence basically).	required
`keystrokes`	`List[Optional[Tuple[float, float]]]`	List of positions (x and y coordinates) for each keystroke of the word being typed.	required
`partial_word`	`str`	Partial word being typed (corresponding to the keystrokes).	required

Returns:

Type	Description
`List[str]`	The list of completion candidates.

Source code in kebbie/correctors.py

def auto_complete(
    self,
    context: str,
    keystrokes: List[Optional[Tuple[float, float]]],
    partial_word: str,
) -> List[str]:
    """Implementation of `auto_complete` method for emulated keyboards.

    Args:
        context (str): String representing the previously typed characters
            (the beginning of the sentence basically).
        keystrokes (List[Optional[Tuple[float, float]]]): List of positions
            (x and y coordinates) for each keystroke of the word being
            typed.
        partial_word (str): Partial word being typed (corresponding to the
            keystrokes).

    Returns:
        The list of completion candidates.
    """
    if self.fast_mode:
        return []

    self.cached_type(context, partial_word)
    candidates = self.emulator.get_predictions()

    candidates = [c for c in candidates if c != ""]

    return candidates

`predict_next_word(context)`

Implementation of predict_next_word method for emulated keyboards.

Parameters:

Name	Type	Description	Default
`context`	`str`	String representing the previously typed characters (the beginning of the sentence basically).	required

Returns:

Type	Description
`List[str]`	The list of next-word candidates.

Source code in kebbie/correctors.py

def predict_next_word(self, context: str) -> List[str]:
    """Implementation of `predict_next_word` method for emulated keyboards.

    Args:
        context (str): String representing the previously typed characters
            (the beginning of the sentence basically).

    Returns:
        The list of next-word candidates.
    """
    if self.fast_mode:
        return []

    # In order to get the predictions, the space should be typed
    assert context[-1] == " "
    self.cached_type(context[:-1], " ")
    candidates = self.emulator.get_predictions()
    candidates = [c for c in candidates if c != ""]

    return candidates

`emulator.py`

Module containing the code necessary to interact with the emulators, using Appium.

`Emulator`

Class used to interact with an emulator and type word on a given keyboard.

Parameters:

Name	Type	Description	Default
`platform`	`str`	`android` or `ios`.	required
`keyboard`	`str`	The name of the keyboard installed on the emulator. This is needed because each keyboard has a different layout, and we need to know each key's position in order to type words.	required
`device`	`str`	Device UDID to use.	`None`
`host`	`str`	Appium server's address.	`'127.0.0.1'`
`port`	`str`	Appium server's port.	`'4723'`
`get_layout`	`bool`	Set False to don't map the keys.	`True`

Raises:

Type	Description
`ValueError`	Error raised if the given platform doesn't exist.

Source code in kebbie/emulator.py

class Emulator:
    """Class used to interact with an emulator and type word on a given keyboard.

    Args:
        platform (str): `android` or `ios`.
        keyboard (str): The name of the keyboard installed on the emulator.
            This is needed because each keyboard has a different layout, and we
            need to know each key's position in order to type words.
        device (str, optional): Device UDID to use.
        host (str, optional): Appium server's address.
        port (str, optional): Appium server's port.
        get_layout (bool, optional): Set False to don't map the keys.

    Raises:
        ValueError: Error raised if the given platform doesn't exist.
    """

    def __init__(  # noqa: C901
        self,
        platform: str,
        keyboard: str,
        device: str = None,
        host: str = "127.0.0.1",
        port: str = "4723",
        ios_name: str = None,
        ios_platform: str = None,
        get_layout: bool = True,
    ):
        super().__init__()

        self.platform = platform.lower()
        if self.platform not in [ANDROID, IOS]:
            raise ValueError(f"Unknown platform : {self.platform}. Please specify `{ANDROID}` or `{IOS}`.")

        # Start appium
        capabilities = ANDROID_CAPABILITIES if self.platform == ANDROID else IOS_CAPABILITIES
        if self.platform == IOS:
            capabilities["deviceName"] = ios_name
            capabilities["platformVersion"] = ios_platform
            capabilities["wdaLocalPort"] = 8000 + (device if device is not None else 0)
        if self.platform == ANDROID and device is not None:
            capabilities["udid"] = device
        self.driver = webdriver.Remote(f"{host}:{port}", capabilities)
        self.driver.implicitly_wait(20)

        self.screen_size = self.driver.get_window_size()

        self.keyboard = keyboard.lower()

        # Access a typing field
        self.typing_field = None
        self._access_typing_field()

        # Keep track of the keyboard behavior
        # When the typing field is empty, the keyboard is uppercase by default
        self.kb_is_upper = True
        self.last_char_is_space = False
        self.last_char_is_eos = False

        # Set the keyboard as default
        if self.platform == ANDROID:
            self.select_keyboard(keyboard)

        # Get the right layout
        if get_layout:
            if self.keyboard == GBOARD:
                self.detected = GboardLayoutDetector(self.driver, self._tap)
                self.layout = self.detected.layout
            elif self.keyboard == TAPPA:
                self.detected = TappaLayoutDetector(self.driver, self._tap)
                self.layout = self.detected.layout
            elif self.keyboard == FLEKSY:
                self.detected = FleksyLayoutDetector(self.driver)
                self.layout = self.detected.layout
            elif self.keyboard == IOS:
                self.detected = IosLayoutDetector(self.driver, self._tap)
                self.layout = self.detected.layout
            elif self.keyboard == KBKITPRO:
                self.detected = KbkitproLayoutDetector(self.driver, self._tap)
                self.layout = self.detected.layout
            elif self.keyboard == KBKITOSS:
                self.detected = KbkitossLayoutDetector(self.driver, self._tap)
                self.layout = self.detected.layout
            elif self.keyboard == SWIFTKEY:
                self.detected = SwiftkeyLayoutDetector(self.driver, self._tap)
                self.layout = self.detected.layout
            elif self.keyboard == YANDEX:
                self.detected = YandexLayoutDetector(self.driver, self._tap)
                self.layout = self.detected.layout
            else:
                raise ValueError(
                    f"Unknown keyboard : {self.keyboard}. Please specify `{GBOARD}`, `{TAPPA}`, `{FLEKSY}`, "
                    f"`{SWIFTKEY}`, `{YANDEX}`, `{KBKITPRO}`, `{KBKITOSS}` or `{IOS}`."
                )

        self.typing_field.clear()

    def _access_typing_field(self):
        """Start the right application and access the typing field where we
        will type our text.
        """
        if self.platform == ANDROID:
            subprocess.run(
                ["adb", "shell", "am", "start", "-a", "android.intent.action.VIEW", "-d", BROWSER_PAD_URL],
                stdout=subprocess.PIPE,
            )
            typing_field_loaded = False
            while not typing_field_loaded:
                typing_fields = self.driver.find_elements(By.CLASS_NAME, ANDROID_TYPING_FIELD_CLASS_NAME)
                typing_field_loaded = len(typing_fields) == 2
            self.typing_field = typing_fields[0]
        else:
            self.driver.find_element(By.CLASS_NAME, IOS_START_CHAT_CLASS_NAME).click()
            self.typing_field = self.driver.find_element(By.ID, IOS_TYPING_FIELD_ID)
        self.typing_field.click()
        self.typing_field.clear()

    def get_android_devices() -> List[str]:
        """Static method that uses the `adb devices` command to retrieve the
        list of devices running.

        Returns:
            List of detected device UDID.
        """
        result = subprocess.run(["adb", "devices"], stdout=subprocess.PIPE)
        devices = result.stdout.decode().split("\n")
        devices = [d.split()[0] for d in devices if not (d.startswith("List of devices attached") or len(d) == 0)]
        return devices

    def select_keyboard(self, keyboard):
        """Searches the IME of the desired keyboard and selects it, only for Android.

        Args:
            keyboard (str): Keyboard to search.
        """
        if keyboard not in KEYBOARD_PACKAGE:
            print(
                f"Warning ! {keyboard}'s IME isn't provided (in `KEYBOARD_PACKAGE`), can't automatically select the "
                "keyboard."
            )
            return

        ime_list = subprocess.check_output(["adb", "shell", "ime", "list", "-s"], universal_newlines=True)
        ime_name = None
        for ime in ime_list.strip().split("\n"):
            if KEYBOARD_PACKAGE[keyboard] in ime:
                ime_name = ime
                break
        if ime_name:
            subprocess.run(
                ["adb", "shell", "settings", "put", "secure", "show_ime_with_hard_keyboard", "1"],
                stdout=subprocess.PIPE,
            )
            subprocess.run(["adb", "shell", "ime", "enable", ime_name], stdout=subprocess.PIPE)
            subprocess.run(["adb", "shell", "ime", "set", ime_name], stdout=subprocess.PIPE)

    def get_ios_devices() -> List[Tuple[str, str]]:
        """Static method that uses the `xcrun simctl` command to retrieve the
        list of booted devices.

        Returns:
            List of booted device platform and device name.
        """
        devices = []

        result = subprocess.run(["xcrun", "simctl", "list", "devices"], stdout=subprocess.PIPE)
        out = result.stdout.decode().split("\n")

        curr_platform = ""
        for line in out:
            if line.startswith("== ") and line.endswith(" =="):
                continue
            elif line.startswith("-- ") and line.endswith(" --"):
                curr_platform = line[3:-3]
            else:
                m = re.match(r"\s+([^\t]+)\s+\([A-Z0-9\-]+\)\s+\((Booted|Shutdown)\)", line)
                if m:
                    device_name = m.group(1)
                    status = m.group(2)

                    if status == "Booted" and curr_platform.startswith("iOS "):
                        devices.append((curr_platform[4:], device_name))

        return devices

    def _paste(self, text: str):
        """Paste the given text into the typing field, to quickly simulate
        typing a context.

        Args:
            text (str): Text to paste.
        """
        if text == "":
            self.typing_field.clear()
            self.kb_is_upper = True
            self.last_char_is_space = False
            self.last_char_is_eos = False
        else:
            # Note : on Android, pasting content in the field will erase the previous content
            # (which is what we want). On iOS it will not, we need to do it "manually"
            if self.platform == IOS:
                self.typing_field.clear()
            if self.keyboard == KBKITPRO or self.keyboard == KBKITOSS or self.keyboard == FLEKSY:
                # In the case of KeyboardKit / Fleksy, after pasting the content, typing a space
                # trigger a punctuation (because previous context may end with a space)
                # To avoid this behavior, break the cycle by typing a backspace
                self._tap(self.layout["lowercase"]["backspace"])
            self.typing_field.send_keys(text)
            self.kb_is_upper = len(text) > 1 and self._is_eos(text[-2]) and text.endswith(" ")
            self.last_char_is_space = text.endswith(" ")
            self.last_char_is_eos = self._is_eos(text[-1])

    def paste(self, text: str):
        """Paste the given text into the typing field, to quickly simulate
        typing a context.

        This method is just a wrapper around `_paste()`, making sure the typing
        field is accessible. If for some reason it is not accessible, it tries
        to access it and perform the action again.

        Args:
            text (str): Text to paste.
        """
        try:
            self._paste(text)
        except StaleElementReferenceException:
            self._access_typing_field()
            self._paste(text)

    def type_characters(self, characters: str):  # noqa: C901
        """Type the given sentence on the keyboard. For each character, it
        finds the keys to press and send a tap on the keyboard.

        Args:
            characters (str): The sentence to type.
        """
        for c in characters:
            if c == " ":
                if self.last_char_is_space:
                    # If the previous character was a space, don't retype a space
                    # because it can be transformed into a `.`
                    continue

                if self.kb_is_upper:
                    self._tap(self.layout["uppercase"]["spacebar"])
                else:
                    self._tap(self.layout["lowercase"]["spacebar"])

                # Behavior of the keyboard : if the previous character typed was an EOS marker
                # and a space is typed, the keyboard automatically switch to uppercase
                if self.last_char_is_eos:
                    self.kb_is_upper = True
            elif c in self.layout["lowercase"]:
                # The character is a lowercase character
                if self.kb_is_upper:
                    # If the keyboard is in uppercase mode, change it to lowercase
                    self._tap(self.layout["uppercase"]["shift"])
                    if self.keyboard == SWIFTKEY:
                        # Swiftkey needs double tap, otherwise we are capslocking
                        self._tap(self.layout["uppercase"]["shift"])
                self._tap(self.layout["lowercase"][c])
            elif c in self.layout["uppercase"]:
                # The character is an uppercase character
                if not self.kb_is_upper:
                    # Change the keyboard to uppercase
                    self._tap(self.layout["lowercase"]["shift"])
                self._tap(self.layout["uppercase"][c])
                # After typing one character, the keyboard automatically come back to lowercase
            elif c in self.layout["numbers"]:
                # The character is a number of a special character
                # Access the number keyboard properly
                if self.kb_is_upper:
                    self._tap(self.layout["uppercase"]["numbers"])
                else:
                    self._tap(self.layout["lowercase"]["numbers"])
                self._tap(self.layout["numbers"][c])

                if c != "'" or self.keyboard in [GBOARD, SWIFTKEY]:
                    # For some reason, when `'` is typed, the keyboard automatically goes back
                    # to lowercase, so no need to re-tap the button (unless the keyboard is GBoard / Swiftkey).
                    # In all other cases, switch back to letters keyboard
                    self._tap(self.layout["numbers"]["letters"])
            else:
                # Can't type this character, ignore it
                continue

            # Behavior of the keyboard : if the previous character typed was an EOS marker
            # and a space is typed, the keyboard automatically switch to uppercase
            self.kb_is_upper = self.last_char_is_eos and c == " "

            # Update infos about what we typed
            self.last_char_is_eos = self._is_eos(c)
            self.last_char_is_space = c == " "

    def _is_eos(self, c: str) -> bool:
        """Check if the given character is an End-Of-Sentence marker. If an EOS
        marker is typed followed by a space, the keyboard automatically switch
        to uppercase letters (unless it's GBoard).

        Args:
            c (str): Character to check.

        Returns:
            True if the character is an EOS marker.
        """
        if self.keyboard == GBOARD:
            return False
        else:
            return c in [".", "!", "?"]

    def _tap(self, frame: List[int], keyboard_frame: List[int] = None):
        """Tap on the screen at the position described by the given frame.

        Args:
            frame (List[int]): Frame describing the position where to tap. A
                frame is : [start_pos_x, start_pos_y, width, height].
            keyboard_frame (List[int]): If specified, the Keyboard frame to
                use. If `None`, it will use `self.layout["keyboard_frame"]`.
        """
        x, y, w, h = frame
        base_x, base_y, *_ = keyboard_frame if keyboard_frame else self.layout["keyboard_frame"]

        pos_x = base_x + x + int(w / 2)
        pos_y = base_y + y + int(h / 2)

        actions = ActionChains(self.driver)
        actions.w3c_actions = ActionBuilder(self.driver, mouse=PointerInput(interaction.POINTER_TOUCH, "touch"))
        actions.w3c_actions.pointer_action.move_to_location(pos_x, pos_y)
        actions.w3c_actions.pointer_action.pointer_down()
        actions.w3c_actions.pointer_action.pause(0.05)
        actions.w3c_actions.pointer_action.release()
        actions.perform()

    def _take_screenshot(self):
        """Take a screenshot of the full screen.

        Returns:
            The image of the screen.
        """
        screen_data = self.driver.get_screenshot_as_png()
        screen = np.asarray(Image.open(io.BytesIO(screen_data)))
        return screen.copy()

    def get_predictions(self, lang: str = "en") -> List[str]:
        """Retrieve the predictions displayed by the keyboard.

        Args:
            lang (str): Language to use for the OCR.

        Returns:
            List of predictions from the keyboard.
        """
        if hasattr(self, "detected"):
            # Only keyboards that were auto-detected (using XML tree) have the
            # attribute `detected`. If that's the case, it means we
            # can retrieve the suggestions directly from the XML tree !
            predictions = self.detected.get_suggestions()
        else:
            # Other keyboards still have to use (slow) OCR
            time.sleep(PREDICTION_DELAY)
            screen = self._take_screenshot()

            kb_x, kb_y, kb_w, kb_h = self.layout["keyboard_frame"]
            screen = screen[kb_y : kb_y + kb_h, kb_x : kb_x + kb_w]

            predictions = []
            for x, y, w, h in self.layout["suggestions_frames"]:
                suggestion_area = screen[y : y + h, x : x + w]
                ocr_results = pytesseract.image_to_string(suggestion_area, config=TESSERACT_CONFIG)
                pred = ocr_results.strip().replace("“", "").replace('"', "").replace("\\", "")
                predictions.append(pred)

        return predictions

    def _get_text(self) -> str:
        """Return the text currently contained in the typing field.

        Returns:
            Text of the typing field.
        """
        return self.typing_field.text

    def get_text(self) -> str:
        """Return the text currently contained in the typing field.

        This method is just a wrapper around `_get_text()`, making sure the
        typing field is accessible. If for some reason it is not accessible, it
        tries to access it and perform the action again.

        Returns:
            Text of the typing field.
        """
        try:
            return self._get_text()
        except StaleElementReferenceException:
            self._access_typing_field()
            return self._get_text()

    def show_keyboards(self):
        """Take a screenshot and overlay the given layout, for debugging the
        position of each keys.
        """
        # Type a character, in order to have some suggestions
        # Keyboard starts with uppercase letter by default (unless GBoard), and
        # automatically go to lowercase after
        if self.keyboard == GBOARD:
            self._tap(self.layout["lowercase"]["a"])
        else:
            self._tap(self.layout["uppercase"]["A"])
        screen_lower = self._take_screenshot()

        self._tap(self.layout["lowercase"]["shift"])
        screen_upper = self._take_screenshot()

        self._tap(self.layout["lowercase"]["numbers"])
        screen_numbers = self._take_screenshot()

        for layout_name, screen in zip(
            ["lowercase", "uppercase", "numbers"], [screen_lower, screen_upper, screen_numbers]
        ):
            self._set_area_box(screen, (0, 0), self.layout["keyboard_frame"], "keyboard frame")
            if "suggestions_frames" in self.layout:
                for i, suggestion_frame in enumerate(self.layout["suggestions_frames"]):
                    self._set_area_box(screen, self.layout["keyboard_frame"], suggestion_frame, f"suggestion {i}")
            for key_name, key_frame in self.layout[layout_name].items():
                self._set_area_box(screen, self.layout["keyboard_frame"], key_frame, key_name)

            cv2.imshow(layout_name, screen)

        cv2.waitKey(0)
        cv2.destroyAllWindows()

    def _set_area_box(self, image, base_coords: Tuple[int], coords: Tuple[int], tag: str):
        """Add an area box on the given image (color is random).

        Args:
            image: Image where to add the box.
            base_coords (Tuple[int]): Base coordinates from the full image.
            coords (Tuple[int]): Coordinates of the element, as well as
                dimensions.
            tag (str): Tag for this box.
        """
        base_x, base_y, *_ = base_coords
        x, y, w, h = coords
        x += base_x
        y += base_y
        # Generate color only until 200, to ensure it's dark enough
        color = (random.randint(0, 200), random.randint(0, 200), random.randint(0, 200))
        cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
        cv2.putText(image, tag, (x, y + h + 17), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

`get_android_devices()`

Static method that uses the adb devices command to retrieve the list of devices running.

Returns:

Type	Description
`List[str]`	List of detected device UDID.

Source code in kebbie/emulator.py

def get_android_devices() -> List[str]:
    """Static method that uses the `adb devices` command to retrieve the
    list of devices running.

    Returns:
        List of detected device UDID.
    """
    result = subprocess.run(["adb", "devices"], stdout=subprocess.PIPE)
    devices = result.stdout.decode().split("\n")
    devices = [d.split()[0] for d in devices if not (d.startswith("List of devices attached") or len(d) == 0)]
    return devices

`select_keyboard(keyboard)`

Searches the IME of the desired keyboard and selects it, only for Android.

Parameters:

Name	Type	Description	Default
`keyboard`	`str`	Keyboard to search.	required

Source code in kebbie/emulator.py

def select_keyboard(self, keyboard):
    """Searches the IME of the desired keyboard and selects it, only for Android.

    Args:
        keyboard (str): Keyboard to search.
    """
    if keyboard not in KEYBOARD_PACKAGE:
        print(
            f"Warning ! {keyboard}'s IME isn't provided (in `KEYBOARD_PACKAGE`), can't automatically select the "
            "keyboard."
        )
        return

    ime_list = subprocess.check_output(["adb", "shell", "ime", "list", "-s"], universal_newlines=True)
    ime_name = None
    for ime in ime_list.strip().split("\n"):
        if KEYBOARD_PACKAGE[keyboard] in ime:
            ime_name = ime
            break
    if ime_name:
        subprocess.run(
            ["adb", "shell", "settings", "put", "secure", "show_ime_with_hard_keyboard", "1"],
            stdout=subprocess.PIPE,
        )
        subprocess.run(["adb", "shell", "ime", "enable", ime_name], stdout=subprocess.PIPE)
        subprocess.run(["adb", "shell", "ime", "set", ime_name], stdout=subprocess.PIPE)

`get_ios_devices()`

Static method that uses the xcrun simctl command to retrieve the list of booted devices.

Returns:

Type	Description
`List[Tuple[str, str]]`	List of booted device platform and device name.

Source code in kebbie/emulator.py

def get_ios_devices() -> List[Tuple[str, str]]:
    """Static method that uses the `xcrun simctl` command to retrieve the
    list of booted devices.

    Returns:
        List of booted device platform and device name.
    """
    devices = []

    result = subprocess.run(["xcrun", "simctl", "list", "devices"], stdout=subprocess.PIPE)
    out = result.stdout.decode().split("\n")

    curr_platform = ""
    for line in out:
        if line.startswith("== ") and line.endswith(" =="):
            continue
        elif line.startswith("-- ") and line.endswith(" --"):
            curr_platform = line[3:-3]
        else:
            m = re.match(r"\s+([^\t]+)\s+\([A-Z0-9\-]+\)\s+\((Booted|Shutdown)\)", line)
            if m:
                device_name = m.group(1)
                status = m.group(2)

                if status == "Booted" and curr_platform.startswith("iOS "):
                    devices.append((curr_platform[4:], device_name))

    return devices

`paste(text)`

Paste the given text into the typing field, to quickly simulate typing a context.

This method is just a wrapper around _paste(), making sure the typing field is accessible. If for some reason it is not accessible, it tries to access it and perform the action again.

Parameters:

Name	Type	Description	Default
`text`	`str`	Text to paste.	required

Source code in kebbie/emulator.py

def paste(self, text: str):
    """Paste the given text into the typing field, to quickly simulate
    typing a context.

    This method is just a wrapper around `_paste()`, making sure the typing
    field is accessible. If for some reason it is not accessible, it tries
    to access it and perform the action again.

    Args:
        text (str): Text to paste.
    """
    try:
        self._paste(text)
    except StaleElementReferenceException:
        self._access_typing_field()
        self._paste(text)

`type_characters(characters)`

Type the given sentence on the keyboard. For each character, it finds the keys to press and send a tap on the keyboard.

Parameters:

Name	Type	Description	Default
`characters`	`str`	The sentence to type.	required

Source code in kebbie/emulator.py

def type_characters(self, characters: str):  # noqa: C901
    """Type the given sentence on the keyboard. For each character, it
    finds the keys to press and send a tap on the keyboard.

    Args:
        characters (str): The sentence to type.
    """
    for c in characters:
        if c == " ":
            if self.last_char_is_space:
                # If the previous character was a space, don't retype a space
                # because it can be transformed into a `.`
                continue

            if self.kb_is_upper:
                self._tap(self.layout["uppercase"]["spacebar"])
            else:
                self._tap(self.layout["lowercase"]["spacebar"])

            # Behavior of the keyboard : if the previous character typed was an EOS marker
            # and a space is typed, the keyboard automatically switch to uppercase
            if self.last_char_is_eos:
                self.kb_is_upper = True
        elif c in self.layout["lowercase"]:
            # The character is a lowercase character
            if self.kb_is_upper:
                # If the keyboard is in uppercase mode, change it to lowercase
                self._tap(self.layout["uppercase"]["shift"])
                if self.keyboard == SWIFTKEY:
                    # Swiftkey needs double tap, otherwise we are capslocking
                    self._tap(self.layout["uppercase"]["shift"])
            self._tap(self.layout["lowercase"][c])
        elif c in self.layout["uppercase"]:
            # The character is an uppercase character
            if not self.kb_is_upper:
                # Change the keyboard to uppercase
                self._tap(self.layout["lowercase"]["shift"])
            self._tap(self.layout["uppercase"][c])
            # After typing one character, the keyboard automatically come back to lowercase
        elif c in self.layout["numbers"]:
            # The character is a number of a special character
            # Access the number keyboard properly
            if self.kb_is_upper:
                self._tap(self.layout["uppercase"]["numbers"])
            else:
                self._tap(self.layout["lowercase"]["numbers"])
            self._tap(self.layout["numbers"][c])

            if c != "'" or self.keyboard in [GBOARD, SWIFTKEY]:
                # For some reason, when `'` is typed, the keyboard automatically goes back
                # to lowercase, so no need to re-tap the button (unless the keyboard is GBoard / Swiftkey).
                # In all other cases, switch back to letters keyboard
                self._tap(self.layout["numbers"]["letters"])
        else:
            # Can't type this character, ignore it
            continue

        # Behavior of the keyboard : if the previous character typed was an EOS marker
        # and a space is typed, the keyboard automatically switch to uppercase
        self.kb_is_upper = self.last_char_is_eos and c == " "

        # Update infos about what we typed
        self.last_char_is_eos = self._is_eos(c)
        self.last_char_is_space = c == " "

`get_predictions(lang='en')`

Retrieve the predictions displayed by the keyboard.

Parameters:

Name	Type	Description	Default
`lang`	`str`	Language to use for the OCR.	`'en'`

Returns:

Type	Description
`List[str]`	List of predictions from the keyboard.

Source code in kebbie/emulator.py

def get_predictions(self, lang: str = "en") -> List[str]:
    """Retrieve the predictions displayed by the keyboard.

    Args:
        lang (str): Language to use for the OCR.

    Returns:
        List of predictions from the keyboard.
    """
    if hasattr(self, "detected"):
        # Only keyboards that were auto-detected (using XML tree) have the
        # attribute `detected`. If that's the case, it means we
        # can retrieve the suggestions directly from the XML tree !
        predictions = self.detected.get_suggestions()
    else:
        # Other keyboards still have to use (slow) OCR
        time.sleep(PREDICTION_DELAY)
        screen = self._take_screenshot()

        kb_x, kb_y, kb_w, kb_h = self.layout["keyboard_frame"]
        screen = screen[kb_y : kb_y + kb_h, kb_x : kb_x + kb_w]

        predictions = []
        for x, y, w, h in self.layout["suggestions_frames"]:
            suggestion_area = screen[y : y + h, x : x + w]
            ocr_results = pytesseract.image_to_string(suggestion_area, config=TESSERACT_CONFIG)
            pred = ocr_results.strip().replace("“", "").replace('"', "").replace("\\", "")
            predictions.append(pred)

    return predictions

`get_text()`

Return the text currently contained in the typing field.

This method is just a wrapper around _get_text(), making sure the typing field is accessible. If for some reason it is not accessible, it tries to access it and perform the action again.

Returns:

Type	Description
`str`	Text of the typing field.

Source code in kebbie/emulator.py

def get_text(self) -> str:
    """Return the text currently contained in the typing field.

    This method is just a wrapper around `_get_text()`, making sure the
    typing field is accessible. If for some reason it is not accessible, it
    tries to access it and perform the action again.

    Returns:
        Text of the typing field.
    """
    try:
        return self._get_text()
    except StaleElementReferenceException:
        self._access_typing_field()
        return self._get_text()

`show_keyboards()`

Take a screenshot and overlay the given layout, for debugging the position of each keys.

Source code in kebbie/emulator.py

def show_keyboards(self):
    """Take a screenshot and overlay the given layout, for debugging the
    position of each keys.
    """
    # Type a character, in order to have some suggestions
    # Keyboard starts with uppercase letter by default (unless GBoard), and
    # automatically go to lowercase after
    if self.keyboard == GBOARD:
        self._tap(self.layout["lowercase"]["a"])
    else:
        self._tap(self.layout["uppercase"]["A"])
    screen_lower = self._take_screenshot()

    self._tap(self.layout["lowercase"]["shift"])
    screen_upper = self._take_screenshot()

    self._tap(self.layout["lowercase"]["numbers"])
    screen_numbers = self._take_screenshot()

    for layout_name, screen in zip(
        ["lowercase", "uppercase", "numbers"], [screen_lower, screen_upper, screen_numbers]
    ):
        self._set_area_box(screen, (0, 0), self.layout["keyboard_frame"], "keyboard frame")
        if "suggestions_frames" in self.layout:
            for i, suggestion_frame in enumerate(self.layout["suggestions_frames"]):
                self._set_area_box(screen, self.layout["keyboard_frame"], suggestion_frame, f"suggestion {i}")
        for key_name, key_frame in self.layout[layout_name].items():
            self._set_area_box(screen, self.layout["keyboard_frame"], key_frame, key_name)

        cv2.imshow(layout_name, screen)

    cv2.waitKey(0)
    cv2.destroyAllWindows()

`LayoutDetector`

Base class for auto-detection of the keyboard layout.

To auto-detect a new keyboard, create a new sub-class, and overwite __init__() and get_suggestions(). Use the existing subclass for GBoard as reference.

Parameters:

Name	Type	Description	Default
`driver`	`Remote`	The Appium driver, used to access elements on the emulator.	required
`tap_fn`	`Callable`	A callback used to tap at specific position on the screen. See `Emulator._tap()`.	required
`xpath_root`	`str`	XPath to the root element of the keyboard.	required
`xpath_keys`	`str`	XPath to detect the keys elements.	required

Source code in kebbie/emulator.py

class LayoutDetector:
    """Base class for auto-detection of the keyboard layout.

    To auto-detect a new keyboard, create a new sub-class, and overwite
    `__init__()` and `get_suggestions()`. Use the existing subclass for GBoard
    as reference.

    Args:
        driver (webdriver.Remote): The Appium driver, used to access elements
            on the emulator.
        tap_fn (Callable): A callback used to tap at specific position on the
            screen. See `Emulator._tap()`.
        xpath_root (str): XPath to the root element of the keyboard.
        xpath_keys (str): XPath to detect the keys elements.
    """

    def __init__(
        self, driver: webdriver.Remote, tap_fn: Callable, xpath_root: str, xpath_keys: str, android: bool = True
    ):
        self.driver = driver
        self.tap = tap_fn
        self.xpath_root = xpath_root
        self.xpath_keys = xpath_keys
        self.android = android

        layout = {}

        # Get the root element of our keyboard
        root = self.driver.find_element(By.XPATH, self.xpath_root)

        # On empty field, the keyboard is on uppercase
        # So first, retrieve the keyboard frame and uppercase characters
        kb_frame, screen_layout = self._detect_keys(root, current_layout="uppercase")
        layout["keyboard_frame"] = kb_frame
        layout["uppercase"] = screen_layout

        # Then, after typing a letter, the keyboard goes to lowercase automatically
        self.tap(layout["uppercase"]["A"], layout["keyboard_frame"])
        _, screen_layout = self._detect_keys(root, keyboard_frame=layout["keyboard_frame"], current_layout="lowercase")
        layout["lowercase"] = screen_layout

        # Finally, access the symbols keyboard and get characters positions
        self.tap(layout["lowercase"]["numbers"], layout["keyboard_frame"])
        _, screen_layout = self._detect_keys(root, keyboard_frame=layout["keyboard_frame"], current_layout="numbers")
        layout["numbers"] = screen_layout

        # Reset out keyboard to the original layer
        self.tap(layout["numbers"]["letters"], layout["keyboard_frame"])

        self.layout = layout

    def get_suggestions(self) -> List[str]:
        """Method to retrieve the keyboard suggestions from the XML tree.

        Note that it's slower to access the XML through methods like
        `find_element()`, and it's faster to access the raw XML with
        `self.driver.page_source` and parse it as text directly.

        Raises:
            NotImplementedError: Exception raised if this method is not
                overwritten.

        Returns:
            List of suggestions from the keyboard.
        """
        raise NotImplementedError

    def _detect_keys(
        self, root: WebElement, current_layout: str, keyboard_frame: List[int] = None
    ) -> Tuple[List[int], Dict]:
        """This method detects all keys currently on screen.

        If no keyboard_frame is given, it will also detects the keyboard frame.

        Args:
            root (WebElement): Root element in the XML tree that represents the
                keyboard (with all its keys).
            current_layout (str): Name of the current layout.
            keyboard_frame (List[int], optional): Optionally, the keyboard
                frame (so we don't need to re-detect it everytime).

        Returns:
            Keyboard frame
            Layout with all the keys detected on this screen.
        """
        layout = {}
        if keyboard_frame is None:
            if self.android:
                # Detect the keyboard frame
                kb = root.find_element(By.ID, "android:id/inputArea")
                keyboard_frame = self._get_frame(kb)
            else:
                keyboard_frame = self._get_frame(root)

        for key_elem in root.find_elements(By.XPATH, self.xpath_keys):
            label = self._get_label(key_elem, current_layout=current_layout)
            if label is not None:
                layout[label] = self._get_frame(key_elem)

        # Then update the letters positions to be relative to the keyboard frame
        for k in layout:
            layout[k][0] -= keyboard_frame[0]
            layout[k][1] -= keyboard_frame[1]

        return keyboard_frame, layout

    def _get_frame(self, element: WebElement) -> List[int]:
        """For layout detection, this method returns the bounds of the given
        element.

        Args:
            element (WebElement): XML Element describing a key.

        Returns:
            Bounds of this key.
        """
        if self.android:
            m = re.match(r"\[(\d+),(\d+)\]\[(\d+),(\d+)\]", element.get_attribute("bounds"))
            if m:
                bounds = [int(g) for g in m.groups()]
                return [bounds[0], bounds[1], bounds[2] - bounds[0], bounds[3] - bounds[1]]
        else:
            r = json.loads(element.get_attribute("rect"))
            return [r["x"], r["y"], r["width"], r["height"]]

    def _get_label(self, element: WebElement, current_layout: str, is_suggestion: bool = False) -> str:
        """For layout detection, this method returns the content of the given
        element.

        This method returns `None` if it's a key we don't care about. This
        method takes care of translating the content (the name used in the XML
        tree is not the same as the one used in our layout).

        Args:
            element (WebElement): XML Element describing a key.
            current_layout (str): Name of the current layout.
            is_suggestion (bool, optional): If we are retrieving the content of
                a suggestion, the content shouldn't be translated.

        Returns:
            Content of the key, or None if it's a key we should ignore.
        """
        content = element.get_attribute("content-desc") if self.android else element.get_attribute("name")

        if is_suggestion:
            # If we are getting the content of the suggestion, return the content directly
            return content

        if content in CONTENT_TO_IGNORE:
            return None
        elif not self.android and content == "more":
            if current_layout == "uppercase" or current_layout == "lowercase":
                return "numbers"
            else:
                return "letters"
        elif content in CONTENT_TO_RENAME:
            return CONTENT_TO_RENAME[content]
        else:
            return content

`get_suggestions()`

Method to retrieve the keyboard suggestions from the XML tree.

Note that it's slower to access the XML through methods like find_element(), and it's faster to access the raw XML with self.driver.page_source and parse it as text directly.

Raises:

Type	Description
`NotImplementedError`	Exception raised if this method is not overwritten.

Returns:

Type	Description
`List[str]`	List of suggestions from the keyboard.

Source code in kebbie/emulator.py

def get_suggestions(self) -> List[str]:
    """Method to retrieve the keyboard suggestions from the XML tree.

    Note that it's slower to access the XML through methods like
    `find_element()`, and it's faster to access the raw XML with
    `self.driver.page_source` and parse it as text directly.

    Raises:
        NotImplementedError: Exception raised if this method is not
            overwritten.

    Returns:
        List of suggestions from the keyboard.
    """
    raise NotImplementedError

`GboardLayoutDetector`

Bases: LayoutDetector

Layout detector for the Gboard keyboard. See LayoutDetector for more information.

Source code in kebbie/emulator.py

class GboardLayoutDetector(LayoutDetector):
    """Layout detector for the Gboard keyboard. See `LayoutDetector` for more
    information.
    """

    def __init__(self, *args, **kwargs):
        super().__init__(
            *args,
            xpath_root=f"./*/*[@package='{KEYBOARD_PACKAGE[GBOARD]}']",
            xpath_keys=".//*[@resource-id][@content-desc]",
            **kwargs,
        )

    def get_suggestions(self) -> List[str]:
        """Method to retrieve the keyboard suggestions from the XML tree.

        Returns:
            List of suggestions from the keyboard.
        """
        suggestions = []

        sections = [
            data
            for data in self.driver.page_source.split("<android.widget.FrameLayout")
            if "com.google.android.inputmethod" in data
        ]
        for section in sections:
            if "content-desc" in section and "resource-id" not in section and 'long-clickable="true"' in section:
                m = re.search(r"content\-desc=\"([^\"]*)\"", section)
                if m:
                    content = m.group(1)

                    # Deal with emojis
                    emoji = re.match(r"emoji (&[^;]+;)", content)
                    suggestions.append(html.unescape(emoji[1]) if emoji else content)

        return suggestions

`get_suggestions()`

Method to retrieve the keyboard suggestions from the XML tree.

Returns:

Type	Description
`List[str]`	List of suggestions from the keyboard.

Source code in kebbie/emulator.py

def get_suggestions(self) -> List[str]:
    """Method to retrieve the keyboard suggestions from the XML tree.

    Returns:
        List of suggestions from the keyboard.
    """
    suggestions = []

    sections = [
        data
        for data in self.driver.page_source.split("<android.widget.FrameLayout")
        if "com.google.android.inputmethod" in data
    ]
    for section in sections:
        if "content-desc" in section and "resource-id" not in section and 'long-clickable="true"' in section:
            m = re.search(r"content\-desc=\"([^\"]*)\"", section)
            if m:
                content = m.group(1)

                # Deal with emojis
                emoji = re.match(r"emoji (&[^;]+;)", content)
                suggestions.append(html.unescape(emoji[1]) if emoji else content)

    return suggestions

`IosLayoutDetector`

Bases: LayoutDetector

Layout detector for the iOS default keyboard. See LayoutDetector for more information.

Source code in kebbie/emulator.py

class IosLayoutDetector(LayoutDetector):
    """Layout detector for the iOS default keyboard. See `LayoutDetector` for
    more information.
    """

    def __init__(self, *args, **kwargs):
        super().__init__(
            *args,
            xpath_root=".//XCUIElementTypeKeyboard",
            xpath_keys="(.//XCUIElementTypeKey|.//XCUIElementTypeButton)",
            android=False,
            **kwargs,
        )

    def get_suggestions(self) -> List[str]:
        """Method to retrieve the keyboard suggestions from the XML tree.

        Returns:
            List of suggestions from the keyboard.
        """
        suggestions = []

        sections = [
            data for data in self.driver.page_source.split("<XCUIElementTypeOther") if "name=" in data.split(">")[0]
        ]
        is_typing_predictions_section = False
        for section in sections:
            m = re.search(r"name=\"([^\"]*)\"", section)
            if m:
                name = m.group(1)

                if name == "Typing Predictions":
                    is_typing_predictions_section = True
                    continue

                if is_typing_predictions_section:
                    suggestions.append(name.replace("“", "").replace("”", ""))

        return suggestions

`get_suggestions()`

Method to retrieve the keyboard suggestions from the XML tree.

Returns:

Type	Description
`List[str]`	List of suggestions from the keyboard.

Source code in kebbie/emulator.py

def get_suggestions(self) -> List[str]:
    """Method to retrieve the keyboard suggestions from the XML tree.

    Returns:
        List of suggestions from the keyboard.
    """
    suggestions = []

    sections = [
        data for data in self.driver.page_source.split("<XCUIElementTypeOther") if "name=" in data.split(">")[0]
    ]
    is_typing_predictions_section = False
    for section in sections:
        m = re.search(r"name=\"([^\"]*)\"", section)
        if m:
            name = m.group(1)

            if name == "Typing Predictions":
                is_typing_predictions_section = True
                continue

            if is_typing_predictions_section:
                suggestions.append(name.replace("“", "").replace("”", ""))

    return suggestions

`KbkitproLayoutDetector`

Bases: LayoutDetector

Layout detector for the KeyboardKit Pro demo keyboard. See LayoutDetector for more information.

Source code in kebbie/emulator.py

class KbkitproLayoutDetector(LayoutDetector):
    """Layout detector for the KeyboardKit Pro demo keyboard. See
    `LayoutDetector` for more information.
    """

    def __init__(self, *args, **kwargs):
        super().__init__(
            *args,
            xpath_root=".//XCUIElementTypeOther[XCUIElementTypeButton and XCUIElementTypeTextField]",
            xpath_keys=".//XCUIElementTypeButton",
            android=False,
            **kwargs,
        )

    def get_suggestions(self) -> List[str]:
        """Method to retrieve the keyboard suggestions from the XML tree.

        Returns:
            List of suggestions from the keyboard.
        """
        suggestions = []

        for data in self.driver.page_source.split("<XCUIElementTypeOther"):
            if "<XCUIElementTypeTextField" in data:
                pred_part = data.split("<XCUIElementTypeTextField")[0]
                if "<XCUIElementTypeButton" in pred_part and 'name="Add"' in pred_part:
                    for elem in pred_part.split(">")[2:]:
                        if "<XCUIElementTypeTextField" in elem:
                            break
                        m = re.search(r"name=\"([^\"]*)\"", elem)
                        if m:
                            name = m.group(1)
                            suggestions.append(name.replace("“", "").replace("”", ""))

        return suggestions

`get_suggestions()`

Method to retrieve the keyboard suggestions from the XML tree.

Returns:

Type	Description
`List[str]`	List of suggestions from the keyboard.

Source code in kebbie/emulator.py

def get_suggestions(self) -> List[str]:
    """Method to retrieve the keyboard suggestions from the XML tree.

    Returns:
        List of suggestions from the keyboard.
    """
    suggestions = []

    for data in self.driver.page_source.split("<XCUIElementTypeOther"):
        if "<XCUIElementTypeTextField" in data:
            pred_part = data.split("<XCUIElementTypeTextField")[0]
            if "<XCUIElementTypeButton" in pred_part and 'name="Add"' in pred_part:
                for elem in pred_part.split(">")[2:]:
                    if "<XCUIElementTypeTextField" in elem:
                        break
                    m = re.search(r"name=\"([^\"]*)\"", elem)
                    if m:
                        name = m.group(1)
                        suggestions.append(name.replace("“", "").replace("”", ""))

    return suggestions

`KbkitossLayoutDetector`

Bases: LayoutDetector

Layout detector for the KeyboardKit OSS demo keyboard. See LayoutDetector for more information.

Source code in kebbie/emulator.py

class KbkitossLayoutDetector(LayoutDetector):
    """Layout detector for the KeyboardKit OSS demo keyboard. See
    `LayoutDetector` for more information.
    """

    def __init__(self, *args, **kwargs):
        super().__init__(
            *args,
            xpath_root=".//XCUIElementTypeOther[XCUIElementTypeButton and XCUIElementTypeStaticText]",
            xpath_keys=".//XCUIElementTypeButton",
            android=False,
            **kwargs,
        )

    def get_suggestions(self) -> List[str]:
        """Method to retrieve the keyboard suggestions from the XML tree.

        Returns:
            List of suggestions from the keyboard.
        """
        suggestions = []

        for data in self.driver.page_source.split("<XCUIElementTypeOther"):
            if ", Subtitle" in data:
                pred_part = data.split(", Subtitle")[0]
                for elem in pred_part.split(">")[1:]:
                    m = re.search(r"name=\"([^\"]*)\"?", elem)
                    if m:
                        name = m.group(1)
                        suggestions.append(name.replace("“", "").replace("”", ""))

        return suggestions

`get_suggestions()`

Method to retrieve the keyboard suggestions from the XML tree.

Returns:

Type	Description
`List[str]`	List of suggestions from the keyboard.

Source code in kebbie/emulator.py

def get_suggestions(self) -> List[str]:
    """Method to retrieve the keyboard suggestions from the XML tree.

    Returns:
        List of suggestions from the keyboard.
    """
    suggestions = []

    for data in self.driver.page_source.split("<XCUIElementTypeOther"):
        if ", Subtitle" in data:
            pred_part = data.split(", Subtitle")[0]
            for elem in pred_part.split(">")[1:]:
                m = re.search(r"name=\"([^\"]*)\"?", elem)
                if m:
                    name = m.group(1)
                    suggestions.append(name.replace("“", "").replace("”", ""))

    return suggestions

`SwiftkeyLayoutDetector`

Bases: LayoutDetector

Layout detector for the Swiftkey keyboard. See LayoutDetector for more information.

Source code in kebbie/emulator.py

class SwiftkeyLayoutDetector(LayoutDetector):
    """Layout detector for the Swiftkey keyboard. See `LayoutDetector` for more
    information.
    """

    def __init__(self, *args, **kwargs):
        super().__init__(
            *args,
            xpath_root=f"./*/*[@package='{KEYBOARD_PACKAGE[SWIFTKEY]}']",
            xpath_keys=".//*[@class='android.view.View'][@content-desc]",
            **kwargs,
        )

    def get_suggestions(self) -> List[str]:
        """Method to retrieve the keyboard suggestions from the XML tree.

        Returns:
            List of suggestions from the keyboard.
        """
        suggestions = []

        # Get the raw content as text, weed out useless elements
        for data in self.driver.page_source.split("<android.widget.FrameLayout"):
            if "com.touchtype.swiftkey" in data and "<android.view.View " in data:
                sections = data.split("<android.view.View ")
                for section in sections[1:]:
                    m = re.search(r"content-desc=\"([^\"]*)\"", section)
                    if m:
                        suggestions.append(html.unescape(m.group(1)))
                break

        return suggestions

`get_suggestions()`

Method to retrieve the keyboard suggestions from the XML tree.

Returns:

Type	Description
`List[str]`	List of suggestions from the keyboard.

Source code in kebbie/emulator.py

def get_suggestions(self) -> List[str]:
    """Method to retrieve the keyboard suggestions from the XML tree.

    Returns:
        List of suggestions from the keyboard.
    """
    suggestions = []

    # Get the raw content as text, weed out useless elements
    for data in self.driver.page_source.split("<android.widget.FrameLayout"):
        if "com.touchtype.swiftkey" in data and "<android.view.View " in data:
            sections = data.split("<android.view.View ")
            for section in sections[1:]:
                m = re.search(r"content-desc=\"([^\"]*)\"", section)
                if m:
                    suggestions.append(html.unescape(m.group(1)))
            break

    return suggestions

`YandexLayoutDetector`

Bases: LayoutDetector

Layout detector for the Yandex keyboard. See LayoutDetector for more information.

Source code in kebbie/emulator.py

class YandexLayoutDetector(LayoutDetector):
    """Layout detector for the Yandex keyboard. See `LayoutDetector` for more
    information.
    """

    def __init__(self, *args, **kwargs):
        super().__init__(
            *args,
            xpath_root=f"./*/*[@package='{KEYBOARD_PACKAGE[YANDEX]}']",
            xpath_keys=".//*[@class='ya.d'][@content-desc]",
            **kwargs,
        )

    def get_suggestions(self) -> List[str]:
        """Method to retrieve the keyboard suggestions from the XML tree.

        Returns:
            List of suggestions from the keyboard.
        """
        suggestions = []

        # Depending if we are on a real device or on emulator, the
        # Yandex keyboard uses different XML tags...
        if "<javaClass" in self.driver.page_source:  # Real device
            section = self.driver.page_source.split(f"{KEYBOARD_PACKAGE[YANDEX]}:id/drawable_suggest_container")[
                1
            ].split("</android.view.View>")[0]

            for line in section.split("\n"):
                if "<javaClass" in line:
                    m = re.search(r"content-desc=\"([^\"]*)\"", line)
                    if m:
                        suggestions.append(html.unescape(m.group(1)))
        else:  # Emulator
            for s in self.driver.page_source.split("android.widget.LinearLayout"):
                if f"{KEYBOARD_PACKAGE[YANDEX]}:id/kb_suggest_suggestions_container" in s:
                    suggestions_section = s
                    break

            for line in suggestions_section.split("\n"):
                if (
                    "kb_suggest_left_suggestion" in line
                    or "kb_suggest_center_suggestion" in line
                    or "kb_suggest_right_suggestion" in line
                ):
                    m = re.search(r"content-desc=\"([^\"]*)\"", line)
                    if m:
                        suggestions.append(html.unescape(m.group(1)))

        return suggestions

`get_suggestions()`

Method to retrieve the keyboard suggestions from the XML tree.

Returns:

Type	Description
`List[str]`	List of suggestions from the keyboard.

Source code in kebbie/emulator.py

def get_suggestions(self) -> List[str]:
    """Method to retrieve the keyboard suggestions from the XML tree.

    Returns:
        List of suggestions from the keyboard.
    """
    suggestions = []

    # Depending if we are on a real device or on emulator, the
    # Yandex keyboard uses different XML tags...
    if "<javaClass" in self.driver.page_source:  # Real device
        section = self.driver.page_source.split(f"{KEYBOARD_PACKAGE[YANDEX]}:id/drawable_suggest_container")[
            1
        ].split("</android.view.View>")[0]

        for line in section.split("\n"):
            if "<javaClass" in line:
                m = re.search(r"content-desc=\"([^\"]*)\"", line)
                if m:
                    suggestions.append(html.unescape(m.group(1)))
    else:  # Emulator
        for s in self.driver.page_source.split("android.widget.LinearLayout"):
            if f"{KEYBOARD_PACKAGE[YANDEX]}:id/kb_suggest_suggestions_container" in s:
                suggestions_section = s
                break

        for line in suggestions_section.split("\n"):
            if (
                "kb_suggest_left_suggestion" in line
                or "kb_suggest_center_suggestion" in line
                or "kb_suggest_right_suggestion" in line
            ):
                m = re.search(r"content-desc=\"([^\"]*)\"", line)
                if m:
                    suggestions.append(html.unescape(m.group(1)))

    return suggestions

`TappaLayoutDetector`

Bases: LayoutDetector

Layout detector for the Tappa keyboard. See LayoutDetector for more information.

Source code in kebbie/emulator.py

class TappaLayoutDetector(LayoutDetector):
    """Layout detector for the Tappa keyboard. See `LayoutDetector` for more
    information.
    """

    def __init__(self, *args, **kwargs):
        super().__init__(
            *args,
            xpath_root=f"./*/*[@package='{KEYBOARD_PACKAGE[TAPPA]}']",
            xpath_keys=".//com.mocha.keyboard.inputmethod.keyboard.Key",
            **kwargs,
        )

    def get_suggestions(self) -> List[str]:
        """Method to retrieve the keyboard suggestions from the XML tree.

        Returns:
            List of suggestions from the keyboard.
        """
        suggestions = []

        # Get the raw content as text, weed out useless elements
        section = self.driver.page_source.split(f"{KEYBOARD_PACKAGE[TAPPA]}:id/suggestions_strip")[1].split(
            "</android.widget.LinearLayout>"
        )[0]

        for line in section.split("\n"):
            if "<android.widget.TextView" in line:
                m = re.search(r"text=\"([^\"]*)\"", line)
                if m:
                    suggestions.append(html.unescape(m.group(1)))

        return suggestions

`get_suggestions()`

Method to retrieve the keyboard suggestions from the XML tree.

Returns:

Type	Description
`List[str]`	List of suggestions from the keyboard.

Source code in kebbie/emulator.py

def get_suggestions(self) -> List[str]:
    """Method to retrieve the keyboard suggestions from the XML tree.

    Returns:
        List of suggestions from the keyboard.
    """
    suggestions = []

    # Get the raw content as text, weed out useless elements
    section = self.driver.page_source.split(f"{KEYBOARD_PACKAGE[TAPPA]}:id/suggestions_strip")[1].split(
        "</android.widget.LinearLayout>"
    )[0]

    for line in section.split("\n"):
        if "<android.widget.TextView" in line:
            m = re.search(r"text=\"([^\"]*)\"", line)
            if m:
                suggestions.append(html.unescape(m.group(1)))

    return suggestions

`FleksyLayoutDetector`

Bases: LayoutDetector

Layout detector for the Fleksy keyboard. See LayoutDetector for more information.

Note that this class is only semi-automatically detected : the layout itself is not detected, but the suggestions are retrieved from the XML tree (no need to rely on OCR, much faster). The layout is hard-coded for now.

Source code in kebbie/emulator.py

class FleksyLayoutDetector(LayoutDetector):
    """Layout detector for the Fleksy keyboard. See `LayoutDetector` for more
    information.

    Note that this class is only semi-automatically detected : the layout
    itself is not detected, but the suggestions are retrieved from the XML tree
    (no need to rely on OCR, much faster). The layout is hard-coded for now.
    """

    def __init__(self, driver: webdriver.Remote):
        self.driver = driver

        # Adapt the layout to the screen
        w = FLEKSY_LAYOUT["keyboard_frame"][2]
        h = FLEKSY_LAYOUT["keyboard_frame"][3]
        self.layout = {"keyboard_frame": FLEKSY_LAYOUT["keyboard_frame"]}
        for layout_name in ["lowercase", "uppercase", "numbers"]:
            for key_name, key_frame in FLEKSY_LAYOUT[layout_name].items():
                if layout_name not in self.layout:
                    self.layout[layout_name] = {}
                self.layout[layout_name][key_name] = [
                    int(key_frame[0] * w),
                    int(key_frame[1] * h),
                    int(key_frame[2] * w),
                    int(key_frame[3] * h),
                ]

    def get_suggestions(self) -> List[str]:
        """Method to retrieve the keyboard suggestions from the XML tree.

        Returns:
            List of suggestions from the keyboard.
        """
        suggestions = []

        # Get the raw content as text, weed out useless elements
        sections = [
            s
            for s in self.driver.page_source.split("XCUIElementTypeOther")
            if "XCUIElementTypeStaticText" in s and "XCUIElementTypeButton" not in s
        ]

        for s in sections:
            m = re.search(r"name=\"([^\"]*)\"", s)
            if m:
                suggestions.append(html.unescape(m.group(1)))

        return suggestions

`get_suggestions()`

Method to retrieve the keyboard suggestions from the XML tree.

Returns:

Type	Description
`List[str]`	List of suggestions from the keyboard.

Source code in kebbie/emulator.py

def get_suggestions(self) -> List[str]:
    """Method to retrieve the keyboard suggestions from the XML tree.

    Returns:
        List of suggestions from the keyboard.
    """
    suggestions = []

    # Get the raw content as text, weed out useless elements
    sections = [
        s
        for s in self.driver.page_source.split("XCUIElementTypeOther")
        if "XCUIElementTypeStaticText" in s and "XCUIElementTypeButton" not in s
    ]

    for s in sections:
        m = re.search(r"name=\"([^\"]*)\"", s)
        if m:
            suggestions.append(html.unescape(m.group(1)))

    return suggestions

`gesture.py`

Module containing the function make_swipe_gesture, which is used to create a natural-looking swipe gesture from a list of letter-points.

`make_swipe_gesture(control_points)`

Function to generate artificial swipe gesture from a list of points. The given points represents the typed letters on the keyboard. This function simply generate several other points between the control points. Points are generated using sequential Bezier curves. The resulting swipe gesture pass by the control points.

Parameters:

Name	Type	Description	Default
`control_points`	`List[Tuple[float, float]]`	Control points, representing the letter typed. The resulting swipe gesture will pass by these points.	required

Returns:

Type	Description
`List[Tuple[float, float]]`	Points generated by the swipe gesture.

Source code in kebbie/gesture.py

def make_swipe_gesture(control_points: List[Tuple[float, float]]) -> List[Tuple[float, float]]:
    """Function to generate artificial swipe gesture from a list of points.
    The given points represents the typed letters on the keyboard. This
    function simply generate several other points between the control points.
    Points are generated using sequential Bezier curves. The resulting swipe
    gesture pass by the control points.

    Args:
        control_points (List[Tuple[float, float]]): Control points,
            representing the letter typed. The resulting swipe gesture will
            pass by these points.

    Returns:
        Points generated by the swipe gesture.
    """
    gesture_points = [control_points[0]]

    # Pick a "style" (speed & acceleration) and keep it constant across the gesture
    speed = random.uniform(MIN_N_POINTS_PER_DIST, MAX_N_POINTS_PER_DIST)
    acceleration = random.uniform(MIN_ACCELERATION, MAX_ACCELERATION)

    # Generate bezier curves between each control points
    for p1, p2 in zip(control_points[:-1], control_points[1:]):
        # The distance between the 2 points will dictate the speed and radius
        d = euclidian_dist(p1, p2)
        radius = min(d, MAX_RADIUS)
        n_points = max(1, int(d * speed))

        linspace = accelerated_linspace(n_points, acceleration)

        # We don't want the curves to be straight between the control points,
        # so we generate random points to add curves
        p1_curv = random_point_around(p1, radius=radius)
        p2_curv = random_point_around(p2, radius=radius)

        # Make the bezier curve with the specified number of points
        xs, ys = bezier_curve([p2, p2_curv, p1_curv, p1], linspace=linspace)
        bezier_points = list(zip(xs, ys))

        # Make sure the control point p2 is here
        if bezier_points[-1] != p2:
            bezier_points.append(p2)
        # p1 was already added in the previous loop, no need to add it
        if bezier_points[0] == p1:
            bezier_points = bezier_points[1:]

        gesture_points.extend(bezier_points)

    return gesture_points

`random_point_around(p, radius)`

Generate a random point around the given point p, within the given radius.

Parameters:

Name	Type	Description	Default
`p`	`Tuple[float, float]`	Coordinates to use as a starting point.	required
`radius`	`float`	Radius within the starting point to generate the random point.	required

Returns:

Type	Description
`Tuple[float, float]`	Coordinates of the generated random point.

Source code in kebbie/gesture.py

def random_point_around(p: Tuple[float, float], radius: float) -> Tuple[float, float]:
    """Generate a random point around the given point p, within the given
    radius.

    Args:
        p (Tuple[float, float]): Coordinates to use as a starting point.
        radius (float): Radius within the starting point to generate the random
            point.

    Returns:
        Coordinates of the generated random point.
    """
    rand_x = random.uniform(p[0] - radius, p[0] + radius)
    rand_y = random.uniform(p[1] - radius, p[1] + radius)
    return (rand_x, rand_y)

`bernstein_poly(i, n, t)`

The Bernstein polynomial of n, i as a function of t.

Taken from : https://stackoverflow.com/a/12644499/9494790

Parameters:

Name	Type	Description	Default
`i`	`int`	i	required
`n`	`int`	n	required
`t`	`float`	t	required

Returns:

Type	Description
`float`	The computed value for this polynomial function.

Source code in kebbie/gesture.py

def bernstein_poly(i: int, n: int, t: float) -> float:
    """The Bernstein polynomial of n, i as a function of t.

    Taken from : https://stackoverflow.com/a/12644499/9494790

    Args:
        i (int): i
        n (int): n
        t (float): t

    Returns:
        The computed value for this polynomial function.
    """
    return comb(n, i) * (t ** (n - i)) * (1 - t) ** i

`bezier_curve(control_points, linspace)`

Given a set of control points, return the bezier curve defined by the control points.

See : http://processingjs.nihongoresources.com/bezierinfo/

Taken from : https://stackoverflow.com/a/12644499/9494790

Parameters:

Name	Type	Description	Default
`control_points`	`List[Tuple[float, float]]`	Control points used to generate the bezier curve.	required
`linspace`	`List[float]`	Linspace to use for sampling points across the Bezier curve.	required

Returns:

Type	Description
`Tuple[List[float], List[float]]`	Sampled points along the bezier curve.

Source code in kebbie/gesture.py

def bezier_curve(control_points: List[Tuple[float, float]], linspace: List[float]) -> Tuple[List[float], List[float]]:
    """Given a set of control points, return the bezier curve defined by the
    control points.

    See : http://processingjs.nihongoresources.com/bezierinfo/

    Taken from : https://stackoverflow.com/a/12644499/9494790

    Args:
        control_points (List[Tuple[float, float]]): Control points used to
            generate the bezier curve.
        linspace (List[float]): Linspace to use for sampling points across the
            Bezier curve.

    Returns:
        Sampled points along the bezier curve.
    """
    n_points = len(control_points)
    x_points = np.array([p[0] for p in control_points])
    y_points = np.array([p[1] for p in control_points])

    polynomial_array = np.array([bernstein_poly(i, n_points - 1, linspace) for i in range(0, n_points)])

    x_vals = np.dot(x_points, polynomial_array)
    y_vals = np.dot(y_points, polynomial_array)

    return x_vals, y_vals

`accelerated_linspace(n, acceleration)`

Alternative to np.linspace, instead of giving a range of number evenly distributed, this one is not evenly distributed, and simulate an acceleration at first, and then a deceleration.

Parameters:

Name	Type	Description	Default
`n`	`int`	Number of points to generate in the linspace.	required
`acceleration`	`float`	A number that dictate how constant the acceleration is. The lower, the more S-shape is used.	required

Returns:

Type	Description
`List[float]`	Generated points.

Source code in kebbie/gesture.py

def accelerated_linspace(n: int, acceleration: float) -> List[float]:
    """Alternative to np.linspace, instead of giving a range of number evenly
    distributed, this one is not evenly distributed, and simulate an
    acceleration at first, and then a deceleration.

    Args:
        n (int): Number of points to generate in the linspace.
        acceleration (float): A number that dictate how constant the
            acceleration is. The lower, the more S-shape is used.

    Returns:
        Generated points.
    """

    def norm(x):
        nom = x - x.min()
        denom = x.max() - x.min()
        return nom / denom

    def sigmoid(x, k):
        return 1 / (1 + np.exp(-x / k))

    linspace = np.linspace(-1.0, 1.0, n)

    if n <= 1:
        return linspace
    else:
        return norm(sigmoid(linspace, k=acceleration))

`layout.py`

Module containing the helpers LayoutHelper, useful class to deal with the layout of a keyboard, access key positions, etc...

`KeyInfo` `dataclass`

Structure containing all information needed for a given character (key).

Parameters:

Name	Type	Description	Default
`klayer_id`	`int`	Keyboard Layer ID where this key is located.	required
`width`	`float`	Width of the key.	required
`height`	`float`	Height of the key.	required
`center`	`Tuple[float, float]`	Center position (x, y coordinates) of the key.	required

Source code in kebbie/layout.py

@dataclass
class KeyInfo:
    """Structure containing all information needed for a given character (key).

    Args:
        klayer_id (int): Keyboard Layer ID where this key is located.
        width (float): Width of the key.
        height (float): Height of the key.
        center (Tuple[float, float]): Center position (x, y coordinates) of the
            key.
    """

    klayer_id: int
    width: float
    height: float
    center: Tuple[float, float]

`Key` `dataclass`

Structure containing information needed for each key of a given keyboard layer.

Parameters:

Name	Type	Description	Default
`char`	`str`	Character associated with this key.	required
`bounds`	`Dict[str, float]`	Dictionary representing the bounding box of the key. The dictionary should contains the following keys : `right`, `left`, `top`, `bottom`.	required

Source code in kebbie/layout.py

@dataclass
class Key:
    """Structure containing information needed for each key of a given keyboard
    layer.

    Args:
        char (str): Character associated with this key.
        bounds (Dict[str, float]): Dictionary representing the bounding box of
            the key. The dictionary should contains the following keys :
            `right`, `left`, `top`, `bottom`.
    """

    char: str
    bounds: Dict[str, float]

`LayoutHelper`

Small class that represents a Keyboard layout. The goal of this class is to offer some easy-to-use method to deal with a keyboard layout.

Parameters:

Name	Type	Description	Default
`lang`	`str`	Language of the layout to load.	`'en-US'`
`custom_keyboard`	`Dict`	If provided, instead of relying on the keyboard layout provided by default, uses the given keyboard layout.	`None`
`ignore_layers_after`	`Optional[int])`	Ignore higher layers of the keyboard layout. If `None` is given, no layer is ignored.	`None`

Source code in kebbie/layout.py

class LayoutHelper:
    """Small class that represents a Keyboard layout. The goal of this class is
    to offer some easy-to-use method to deal with a keyboard layout.

    Args:
        lang (str, optional): Language of the layout to load.
        custom_keyboard (Dict, optional): If provided, instead of relying on
            the keyboard layout provided by default, uses the given keyboard
            layout.
        ignore_layers_after (Optional[int]) : Ignore higher layers of the
            keyboard layout. If `None` is given, no layer is ignored.
    """

    def __init__(self, lang: str = "en-US", custom_keyboard: Dict = None, ignore_layers_after: Optional[int] = None):
        keyboard = custom_keyboard if custom_keyboard is not None else load_keyboard(lang)
        self.keys_info, self.klayers_info, self.accents = self._extract_infos(keyboard["layout"], ignore_layers_after)
        self.letter_accents = [c for c in self.accents if re.match(r"^[\pL]+$", c)]
        self.spelling_symbols = keyboard["settings"]["allowed_symbols_in_words"]
        self.layout_name = keyboard["keyboard"]["default-layout"]

    def _extract_infos(  # noqa: C901
        self, keyboard_layout: Dict, ignore_layers_after: Optional[int] = None
    ) -> Tuple[Dict[str, KeyInfo], Dict[int, Key], List[str]]:
        """This method reads the given keyboard layout, and extract useful data
        structures from this (to be used later by other methods). This
        basically builds the LayoutHelper class (and should be used only inside
        the constructor).

        Note:
            The given keyboard layout contains 24 layers. Each key appears in
            one (or several) layer of the keyboard. Accents are associated to
            the same key as their non-accented version.
            This class may be used to generate typing noise, so accents should
            have their own keys (and closer accents should be represented by
            closer keys). This method takes care of it, by generating "virtual
            keyboard layers", for each group of accents. The goal is to
            generate a virtual keyboard layer that is as close as possible as
            the actual keyboard, used by real-users.

        Args:
            keyboard_layout (Dict): Dictionary representing the keyboard and
                its layout.
            ignore_layers_after (Optional[int]) : Ignore higher layers of the
                keyboard layout. If `None` is given, no layer is ignored.

        Returns:
            Key information for each character in the keyboard.
            Key information for each layer of the keyboard.
            List of accents used in the keyboard.
        """
        keys_info = {}  # Dict char -> key infos (bounds, center, klayer ID)
        klayers_info = defaultdict(list)  # Dict klayer ID -> list of keys (bounds, char)
        all_accents = set()

        # A keyboard layout is made of several "layers", each identified by a KeyboardID
        last_klayer_id = len(keyboard_layout)
        for klayer in keyboard_layout:
            if klayer["buttons"] is None or (ignore_layers_after is not None and klayer["id"] > ignore_layers_after):
                continue

            # Each layer is a list of button
            for button in klayer["buttons"]:
                # Button always have a character, and optionally accents
                char, accents = button["labels"][0], button["labels"][1:]

                # Special characters : space, shift, numbers, magic, etc...
                if button["type"] != 1:
                    if char.lower() == SPACE:
                        char = " "
                    elif char == POINT:
                        # Points should be added to our key infos
                        pass
                    else:
                        # Other special characters are ignored
                        char = None

                if char is None:
                    continue

                # Save the character and its key information
                # Save it only if it's not already in a previous klayer
                if char not in keys_info or keys_info[char].klayer_id > klayer["id"]:
                    keys_info[char] = KeyInfo(
                        klayer["id"],
                        button["boundingRect"]["right"] - button["boundingRect"]["left"],
                        button["boundingRect"]["bottom"] - button["boundingRect"]["top"],
                        (button["centerPoint"]["x"], button["centerPoint"]["y"]),
                    )
                # But always save its info in the klayers info
                klayers_info[klayer["id"]].append(Key(char, button["boundingRect"]))

                # Then, save the accents if any
                for i, char_accent in enumerate(accents):
                    all_accents.add(char_accent)

                    # Create a virtual position for the accent
                    bounds, center = self._make_virtual_key(i, button["boundingRect"])

                    # Save the accent (only if not existing) in a new virtual klayer
                    if char_accent not in keys_info:
                        keys_info[char_accent] = KeyInfo(
                            last_klayer_id,
                            bounds["right"] - bounds["left"],
                            bounds["bottom"] - bounds["top"],
                            (center["x"], center["y"]),
                        )
                    # But always saveits info in the klayers info
                    klayers_info[last_klayer_id].append(Key(char_accent, bounds))

                # If we added some accent in a virtual klayer, don't forget to update the last klayer ID
                if accents:
                    last_klayer_id += 1

        return keys_info, klayers_info, sorted(all_accents)

    def _make_virtual_key(
        self, idx: int, initial_bounds: Dict[str, float]
    ) -> Tuple[Dict[str, float], Dict[str, float]]:
        """Method to create a new boundary for an accented character. Based on
        the given id, the generated boundary box will be generated at a
        different position.

        This method tries to follow a similar pattern as the sample app, with
        accents appearing in lines of 4 accents.

        Args:
            idx (int): The index of the bounding box to generate.
            initial_bounds (Dict[str, float]): The bounding box of the
                non-accented key.

        Returns:
            Generated bounding box.
            Its associated center position.
        """
        width = initial_bounds["right"] - initial_bounds["left"]
        height = initial_bounds["bottom"] - initial_bounds["top"]

        start_x = initial_bounds["left"] + (idx % N_ACCENT_PER_LINE) * width
        start_y = initial_bounds["bottom"] - (idx // N_ACCENT_PER_LINE) * height

        bounds = {
            "bottom": start_y,
            "left": start_x,
            "right": start_x + width,
            "top": start_y - height,
        }
        center = {
            "x": bounds["left"] + width / 2,
            "y": bounds["top"] + height / 2,
        }
        return bounds, center

    def get_key_info(self, char: str) -> Tuple[float, float, float, float, int]:
        """Method to retrieve the information associated to a specific key.

        Args:
            char (str): Character for which to retrieve key information.

        Raises:
            KeyError: Exception raised if the given character can't be typed (
                because it doesn't exist on this keyboard layout).

        Returns:
            Width of the key for the requested character.
            Height of the key for the requested character.
            Center position (x-axis) of the key for the requested character.
            Center position (y-axis) of the key for the requested character.
            Keyboard layer ID where the character's key is located.
        """
        k = self.keys_info[char]
        return k.width, k.height, k.center[0], k.center[1], k.klayer_id

    def get_key(self, pos: Tuple[float, float], klayer_id: int) -> str:
        """Get the character associated with the given position.

        Args:
            pos (Tuple[float, float]): Position (x, y) in the keyboard.
            klayer_id (int): Keyboard layer ID to use.

        Returns:
            Character associated to the given position.
        """
        klayer = self.klayers_info[klayer_id]

        try:
            # Retrieve the key that contains the sampled position
            key = next(
                k
                for k in klayer
                if k.bounds["left"] <= pos[0] <= k.bounds["right"] and k.bounds["top"] <= pos[1] <= k.bounds["bottom"]
            )
        except StopIteration:
            # Maybe the sampled position was out of bound -> retrieve the closest key
            key = min(
                klayer,
                key=lambda k: euclidian_dist(
                    pos,
                    (
                        k.bounds["left"] + (k.bounds["right"] - k.bounds["left"]) / 2,
                        k.bounds["top"] + (k.bounds["bottom"] - k.bounds["top"]) / 2,
                    ),
                ),
            )

        return key.char

`_extract_infos(keyboard_layout, ignore_layers_after=None)`

This method reads the given keyboard layout, and extract useful data structures from this (to be used later by other methods). This basically builds the LayoutHelper class (and should be used only inside the constructor).

Note

The given keyboard layout contains 24 layers. Each key appears in one (or several) layer of the keyboard. Accents are associated to the same key as their non-accented version. This class may be used to generate typing noise, so accents should have their own keys (and closer accents should be represented by closer keys). This method takes care of it, by generating "virtual keyboard layers", for each group of accents. The goal is to generate a virtual keyboard layer that is as close as possible as the actual keyboard, used by real-users.

Parameters:

Name	Type	Description	Default
`keyboard_layout`	`Dict`	Dictionary representing the keyboard and its layout.	required
`ignore_layers_after`	`Optional[int])`	Ignore higher layers of the keyboard layout. If `None` is given, no layer is ignored.	`None`

Returns:

Type	Description
`Dict[str, KeyInfo]`	Key information for each character in the keyboard.
`Dict[int, Key]`	Key information for each layer of the keyboard.
`List[str]`	List of accents used in the keyboard.

Source code in kebbie/layout.py

def _extract_infos(  # noqa: C901
    self, keyboard_layout: Dict, ignore_layers_after: Optional[int] = None
) -> Tuple[Dict[str, KeyInfo], Dict[int, Key], List[str]]:
    """This method reads the given keyboard layout, and extract useful data
    structures from this (to be used later by other methods). This
    basically builds the LayoutHelper class (and should be used only inside
    the constructor).

    Note:
        The given keyboard layout contains 24 layers. Each key appears in
        one (or several) layer of the keyboard. Accents are associated to
        the same key as their non-accented version.
        This class may be used to generate typing noise, so accents should
        have their own keys (and closer accents should be represented by
        closer keys). This method takes care of it, by generating "virtual
        keyboard layers", for each group of accents. The goal is to
        generate a virtual keyboard layer that is as close as possible as
        the actual keyboard, used by real-users.

    Args:
        keyboard_layout (Dict): Dictionary representing the keyboard and
            its layout.
        ignore_layers_after (Optional[int]) : Ignore higher layers of the
            keyboard layout. If `None` is given, no layer is ignored.

    Returns:
        Key information for each character in the keyboard.
        Key information for each layer of the keyboard.
        List of accents used in the keyboard.
    """
    keys_info = {}  # Dict char -> key infos (bounds, center, klayer ID)
    klayers_info = defaultdict(list)  # Dict klayer ID -> list of keys (bounds, char)
    all_accents = set()

    # A keyboard layout is made of several "layers", each identified by a KeyboardID
    last_klayer_id = len(keyboard_layout)
    for klayer in keyboard_layout:
        if klayer["buttons"] is None or (ignore_layers_after is not None and klayer["id"] > ignore_layers_after):
            continue

        # Each layer is a list of button
        for button in klayer["buttons"]:
            # Button always have a character, and optionally accents
            char, accents = button["labels"][0], button["labels"][1:]

            # Special characters : space, shift, numbers, magic, etc...
            if button["type"] != 1:
                if char.lower() == SPACE:
                    char = " "
                elif char == POINT:
                    # Points should be added to our key infos
                    pass
                else:
                    # Other special characters are ignored
                    char = None

            if char is None:
                continue

            # Save the character and its key information
            # Save it only if it's not already in a previous klayer
            if char not in keys_info or keys_info[char].klayer_id > klayer["id"]:
                keys_info[char] = KeyInfo(
                    klayer["id"],
                    button["boundingRect"]["right"] - button["boundingRect"]["left"],
                    button["boundingRect"]["bottom"] - button["boundingRect"]["top"],
                    (button["centerPoint"]["x"], button["centerPoint"]["y"]),
                )
            # But always save its info in the klayers info
            klayers_info[klayer["id"]].append(Key(char, button["boundingRect"]))

            # Then, save the accents if any
            for i, char_accent in enumerate(accents):
                all_accents.add(char_accent)

                # Create a virtual position for the accent
                bounds, center = self._make_virtual_key(i, button["boundingRect"])

                # Save the accent (only if not existing) in a new virtual klayer
                if char_accent not in keys_info:
                    keys_info[char_accent] = KeyInfo(
                        last_klayer_id,
                        bounds["right"] - bounds["left"],
                        bounds["bottom"] - bounds["top"],
                        (center["x"], center["y"]),
                    )
                # But always saveits info in the klayers info
                klayers_info[last_klayer_id].append(Key(char_accent, bounds))

            # If we added some accent in a virtual klayer, don't forget to update the last klayer ID
            if accents:
                last_klayer_id += 1

    return keys_info, klayers_info, sorted(all_accents)

`_make_virtual_key(idx, initial_bounds)`

Method to create a new boundary for an accented character. Based on the given id, the generated boundary box will be generated at a different position.

This method tries to follow a similar pattern as the sample app, with accents appearing in lines of 4 accents.

Parameters:

Name	Type	Description	Default
`idx`	`int`	The index of the bounding box to generate.	required
`initial_bounds`	`Dict[str, float]`	The bounding box of the non-accented key.	required

Returns:

Type	Description
`Dict[str, float]`	Generated bounding box.
`Dict[str, float]`	Its associated center position.

Source code in kebbie/layout.py

def _make_virtual_key(
    self, idx: int, initial_bounds: Dict[str, float]
) -> Tuple[Dict[str, float], Dict[str, float]]:
    """Method to create a new boundary for an accented character. Based on
    the given id, the generated boundary box will be generated at a
    different position.

    This method tries to follow a similar pattern as the sample app, with
    accents appearing in lines of 4 accents.

    Args:
        idx (int): The index of the bounding box to generate.
        initial_bounds (Dict[str, float]): The bounding box of the
            non-accented key.

    Returns:
        Generated bounding box.
        Its associated center position.
    """
    width = initial_bounds["right"] - initial_bounds["left"]
    height = initial_bounds["bottom"] - initial_bounds["top"]

    start_x = initial_bounds["left"] + (idx % N_ACCENT_PER_LINE) * width
    start_y = initial_bounds["bottom"] - (idx // N_ACCENT_PER_LINE) * height

    bounds = {
        "bottom": start_y,
        "left": start_x,
        "right": start_x + width,
        "top": start_y - height,
    }
    center = {
        "x": bounds["left"] + width / 2,
        "y": bounds["top"] + height / 2,
    }
    return bounds, center

`get_key_info(char)`

Method to retrieve the information associated to a specific key.

Parameters:

Name	Type	Description	Default
`char`	`str`	Character for which to retrieve key information.	required

Raises:

Type	Description
`KeyError`	Exception raised if the given character can't be typed ( because it doesn't exist on this keyboard layout).

Returns:

Type	Description
`float`	Width of the key for the requested character.
`float`	Height of the key for the requested character.
`float`	Center position (x-axis) of the key for the requested character.
`float`	Center position (y-axis) of the key for the requested character.
`int`	Keyboard layer ID where the character's key is located.

Source code in kebbie/layout.py

def get_key_info(self, char: str) -> Tuple[float, float, float, float, int]:
    """Method to retrieve the information associated to a specific key.

    Args:
        char (str): Character for which to retrieve key information.

    Raises:
        KeyError: Exception raised if the given character can't be typed (
            because it doesn't exist on this keyboard layout).

    Returns:
        Width of the key for the requested character.
        Height of the key for the requested character.
        Center position (x-axis) of the key for the requested character.
        Center position (y-axis) of the key for the requested character.
        Keyboard layer ID where the character's key is located.
    """
    k = self.keys_info[char]
    return k.width, k.height, k.center[0], k.center[1], k.klayer_id

`get_key(pos, klayer_id)`

Get the character associated with the given position.

Parameters:

Name	Type	Description	Default
`pos`	`Tuple[float, float]`	Position (x, y) in the keyboard.	required
`klayer_id`	`int`	Keyboard layer ID to use.	required

Returns:

Type	Description
`str`	Character associated to the given position.

Source code in kebbie/layout.py

def get_key(self, pos: Tuple[float, float], klayer_id: int) -> str:
    """Get the character associated with the given position.

    Args:
        pos (Tuple[float, float]): Position (x, y) in the keyboard.
        klayer_id (int): Keyboard layer ID to use.

    Returns:
        Character associated to the given position.
    """
    klayer = self.klayers_info[klayer_id]

    try:
        # Retrieve the key that contains the sampled position
        key = next(
            k
            for k in klayer
            if k.bounds["left"] <= pos[0] <= k.bounds["right"] and k.bounds["top"] <= pos[1] <= k.bounds["bottom"]
        )
    except StopIteration:
        # Maybe the sampled position was out of bound -> retrieve the closest key
        key = min(
            klayer,
            key=lambda k: euclidian_dist(
                pos,
                (
                    k.bounds["left"] + (k.bounds["right"] - k.bounds["left"]) / 2,
                    k.bounds["top"] + (k.bounds["bottom"] - k.bounds["top"]) / 2,
                ),
            ),
        )

    return key.char

`noise_model.py`

Module defining the NoiseModel class, which takes care of introducing typos in a clean text (and later see if the model can properly correct these typos).

`Typo`

Bases: Enum

Enum listing all possible typos that can be introduced.

Source code in kebbie/noise_model.py

class Typo(Enum):
    """Enum listing all possible typos that can be introduced."""

    # Deletions
    DELETE_SPELLING_SYMBOL = "DELETE_SPELLING_SYMBOL"
    DELETE_SPACE = "DELETE_SPACE"
    DELETE_PUNCTUATION = "DELETE_PUNCTUATION"
    DELETE_CHAR = "DELETE_CHAR"

    # Additions
    ADD_SPELLING_SYMBOL = "ADD_SPELLING_SYMBOL"
    ADD_SPACE = "ADD_SPACE"
    ADD_PUNCTUATION = "ADD_PUNCTUATION"
    ADD_CHAR = "ADD_CHAR"

    # Substitutions
    SUBSTITUTE_CHAR = "SUBSTITUTE_CHAR"

    # Simplifications
    SIMPLIFY_ACCENT = "SIMPLIFY_ACCENT"
    SIMPLIFY_CASE = "SIMPLIFY_CASE"

    # Transposition
    TRANSPOSE_CHAR = "TRANSPOSE_CHAR"

    # Common typos
    COMMON_TYPO = "COMMON_TYPO"

`NoiseModel`

Class responsible for introducing typo in a clean text.

Most of typos are introduced on text directly. Then fuzzy typing is applied, using two Gaussian distributions (for x-axis and y-axis), mimicking a user typing on a soft keyboard.

The ratio arguments are here to choose how wide the Gaussian distribution is. A wider distribution will be less precise, a narrower distribution will be more precise. To test how wide a ratio is, run the following code :

from scipy.stats import norm

def compute(x):
    cdf = norm.cdf(x)
    return cdf - (1 - cdf)

print(compute(2.32))    # >>> 0.9796591226625606

So in this case, a ratio of 2.32 gives a precision of ~98% (a typo will be introduced in 2% of the cases).

Parameters:

Name	Type	Description	Default
`lang`	`str`	Language used.	required
`custom_keyboard`	`Dict`	If provided, instead of relying on the keyboard layout provided by default, uses the given keyboard layout.	`None`
`common_typos`	`Optional[Dict[str, List[str]]]`	Dictionary of common typos. If `None`, common typos are not used.	`None`
`typo_probs`	`Optional[Dict[str, float]]`	Probabilities for each type of typos. If `None` is given, `DEFAULT_TYPO_PROBS` is used.	`None`
`x_offset`	`float`	Parameter for the Gaussian distribution for the fuzzy typing. Base position offset on the x-axis.	`0`
`y_offset`	`float`	Parameter for the Gaussian distribution for the fuzzy typing. Base position offset on the y-axis.	`0`
`x_ratio`	`float`	Parameter for the Gaussian distribution for the fuzzy typing. It controls how wide the distribution is on the x-axis, which is the precision of the typing.	`DEFAULT_SIGMA_RATIO`
`y_ratio`	`float`	Parameter for the Gaussian distribution for the fuzzy typing. It controls how wide the distribution is on the y-axis, which is the precision of the typing.	`DEFAULT_SIGMA_RATIO`

Source code in kebbie/noise_model.py

class NoiseModel:
    """Class responsible for introducing typo in a clean text.

    Most of typos are introduced on text directly. Then fuzzy typing is
    applied, using two Gaussian distributions (for x-axis and y-axis),
    mimicking a user typing on a soft keyboard.

    The ratio arguments are here to choose how wide the Gaussian distribution
    is. A wider distribution will be less precise, a narrower distribution will
    be more precise. To test how wide a ratio is, run the following code :
    ```
    from scipy.stats import norm

    def compute(x):
        cdf = norm.cdf(x)
        return cdf - (1 - cdf)

    print(compute(2.32))    # >>> 0.9796591226625606
    ```
    So in this case, a ratio of `2.32` gives a precision of ~98% (a typo will
    be introduced in 2% of the cases).

    Args:
        lang (str): Language used.
        custom_keyboard (Dict, optional): If provided, instead of relying on
            the keyboard layout provided by default, uses the given keyboard
            layout.
        common_typos (Optional[Dict[str, List[str]]], optional): Dictionary of
            common typos. If `None`, common typos are not used.
        typo_probs (Optional[Dict[str, float]], optional): Probabilities for
            each type of typos. If `None` is given, `DEFAULT_TYPO_PROBS` is
            used.
        x_offset (float, optional): Parameter for the Gaussian distribution for
            the fuzzy typing. Base position offset on the x-axis.
        y_offset (float, optional): Parameter for the Gaussian distribution for
            the fuzzy typing. Base position offset on the y-axis.
        x_ratio (float, optional): Parameter for the Gaussian distribution for
            the fuzzy typing. It controls how wide the distribution is on the
            x-axis, which is the precision of the typing.
        y_ratio (float, optional): Parameter for the Gaussian distribution for
            the fuzzy typing. It controls how wide the distribution is on the
            y-axis, which is the precision of the typing.
    """

    def __init__(
        self,
        lang: str,
        custom_keyboard: Dict = None,
        common_typos: Optional[Dict[str, List[str]]] = None,
        typo_probs: Optional[Dict[str, float]] = None,
        x_offset: float = 0,
        y_offset: float = 0,
        x_ratio: float = DEFAULT_SIGMA_RATIO,
        y_ratio: float = DEFAULT_SIGMA_RATIO,
    ):
        self.lang = lang
        self.x_offset, self.y_offset = x_offset, y_offset
        self.x_ratio, self.y_ratio = x_ratio, y_ratio
        self.klayout = LayoutHelper(self.lang, custom_keyboard=custom_keyboard, ignore_layers_after=3)
        self.probs = typo_probs if typo_probs is not None else DEFAULT_TYPO_PROBS
        self.common_typos = common_typos if common_typos is not None else self._get_common_typos()

    def type_till_space(
        self,
        words: List[str],
    ) -> Tuple[
        List[Optional[Tuple[float, float]]],
        str,
        int,
        List[Typo],
    ]:
        """Method introducing typos word by word.

        This method receives a list of words, and type these words while
        introducing typos.
        So most of the time, only one word will be typed and the method will
        return. In some cases, the space is mistyped or deleted, so two words
        are typed.

        Args:
            words (List[str]): List of words to type.

        Returns:
            List of keystrokes (may contains some None).
            The typed characters as string.
            The number of words typed.
            The list of typos introduced in the string typed.
        """
        all_keystrokes = []
        all_typed_char = ""
        all_typos = []

        for i, word in enumerate(words):
            # Some words can't be corrected (numbers, symbols, etc...) -> Don't introduce typos
            error_free = False if self._is_correctable(word) else True

            # Add typos in the word
            noisy_word, typos = self._introduce_typos(word, error_free=error_free)
            all_typos += typos

            # Type the word (fuzzy)
            keystrokes, typed_char, typos = self._fuzzy_type(noisy_word, error_free=error_free)
            all_keystrokes += keystrokes
            all_typed_char += typed_char
            all_typos += typos

            # Then, we try to type a space (separator between words)
            # TODO : Modify this part for languages without space
            noisy_space, sp_typo_1 = self._introduce_typos(SPACE)
            keystrokes, typed_char, sp_typo_2 = self._fuzzy_type(noisy_space)

            # If the space is correctly typed, return now, otherwise type the next word
            if not sp_typo_1 and not sp_typo_2:
                break
            else:
                all_keystrokes += keystrokes
                all_typed_char += typed_char
                all_typos += sp_typo_1 + sp_typo_2

        return all_keystrokes, all_typed_char, i + 1, all_typos

    def swipe(self, word: str) -> Optional[List[Tuple[float, float]]]:
        """Method for creating an artificial swipe gesture given a word.

        Args:
            word (str): Word to type with a swipe gesture.

        Returns:
            Positions (x, y) of the generated swipe gesture, or None if the
                swipe gesture couldn't be created.
        """
        # Some words can't be corrected (numbers, symbols, etc...) -> Don't introduce typos
        error_free = False if self._is_correctable(word) else True

        # Get the core keystrokes (fuzzy)
        keystrokes, *_ = self._fuzzy_type(word, error_free=error_free)

        # If we can swipe that word, create the corresponding artificial gesture
        if all(keystrokes) and len(keystrokes) > 1:
            return make_swipe_gesture(keystrokes)
        else:
            return None

    def _introduce_typos(self, word: str, error_free: bool = False) -> Tuple[str, List[Typo]]:  # noqa: C901
        """Method to introduce typos in a given string.

        Either the word is changed into an existing common typo, or the word is
        processed as a stream of characters, each character having a chance of
        being mistyped.
        This method only add regular typos (deletions, additions, etc...), and
        is not introducing fuzzy typing.

        Args:
            word (str): Clean string where to add typos.
            error_free (bool): If set to True, don't introduce typo. Defaults
                to False.

        Returns:
            The noisy string.
            The list of typos introduced.
        """
        if error_free:
            return word, []

        # First of all, we either consider the word as a unit and introduce a
        # language-specific common typo (if available), or treat the word as a
        # sequence of character, where each character can have a typo
        if word in self.common_typos and sample(self.probs[Typo.COMMON_TYPO]):
            # Introduce a common typo
            return random.choice(self.common_typos[word]), [Typo.COMMON_TYPO]

        # From here, treat the word as a stream of characters, and potentially
        # add typos for each character
        noisy_word = ""
        typos = []
        word_chars = list(word)
        for i, char in enumerate(word_chars):
            # First, potentially apply simplifications (removing accent, or
            # lowercasing an uppercase character)
            # Note that if the full word is uppercase, we don't apply lowercase
            # simplification (doesn't feel like a natural typo a user would do)
            if char in self.klayout.letter_accents and sample(self.probs[Typo.SIMPLIFY_ACCENT]):
                char = strip_accents(char)
                typos.append(Typo.SIMPLIFY_ACCENT)
            if char.isupper() and len(word) > 1 and not word.isupper() and sample(self.probs[Typo.SIMPLIFY_CASE]):
                char = char.lower()
                typos.append(Typo.SIMPLIFY_CASE)

            # Check if this character exists on our keyboard
            try:
                *_, klayer_id = self.klayout.get_key_info(char)
                char_is_on_kb = True
                char_is_on_default_kb = klayer_id == 0
            except KeyError:
                char_is_on_kb = char_is_on_default_kb = False

            # Then, add the possible typo depending on the character type
            events = []
            is_first_char = bool(i == 0)
            is_last_char = bool(i >= (len(word_chars) - 1))
            if char.isnumeric() or not char_is_on_kb:
                # Don't introduce typos for numbers or symbols that are not on keyboard
                pass
            else:
                if not is_last_char:
                    # Only transpose char if they are on the same keyboard layer
                    try:
                        *_, next_char_klayer_id = self.klayout.get_key_info(word[i + 1])
                    except KeyError:
                        next_char_klayer_id = None

                    if klayer_id == next_char_klayer_id:
                        events.append(Typo.TRANSPOSE_CHAR)
                if char in self.klayout.spelling_symbols:
                    events.append(Typo.DELETE_SPELLING_SYMBOL)
                    events.append(Typo.ADD_SPELLING_SYMBOL)
                elif char.isspace():
                    events.append(Typo.DELETE_SPACE)
                    events.append(Typo.ADD_SPACE)
                elif char in string.punctuation:
                    events.append(Typo.DELETE_PUNCTUATION)
                    events.append(Typo.ADD_PUNCTUATION)
                elif char_is_on_default_kb:
                    events.append(Typo.DELETE_CHAR)
                    events.append(Typo.ADD_CHAR)

            # If it's the last character (and we are not typing a space),
            # don't add deletions typos, because it's an auto-completion case,
            # not auto-correction
            if is_last_char and word != SPACE:
                events = [e for e in events if e not in DELETIONS]

            # Get the probabilities for these possible events
            typo_probs = {e: self.probs[e] for e in events}
            if is_first_char:
                # Deleting the first character of the word is not so common, update the probabilities accordingly
                typo_probs = {e: p * FRONT_DELETION_MULTIPLIER if e in DELETIONS else p for e, p in typo_probs.items()}

            # And sample one of them
            typo = sample_among(typo_probs)

            # Process the typo
            if typo is Typo.TRANSPOSE_CHAR:
                noisy_char = word_chars[i + 1]
                word_chars[i + 1] = char
            elif typo in [Typo.DELETE_SPELLING_SYMBOL, Typo.DELETE_SPACE, Typo.DELETE_PUNCTUATION, Typo.DELETE_CHAR]:
                noisy_char = ""
            elif typo in [Typo.ADD_SPELLING_SYMBOL, Typo.ADD_SPACE, Typo.ADD_PUNCTUATION, Typo.ADD_CHAR]:
                noisy_char = f"{char}{char}"
            else:  # No typo
                noisy_char = char

            noisy_word += noisy_char
            if typo is not None:
                typos.append(typo)

        return noisy_word, typos

    def _fuzzy_type(
        self, word: str, error_free: bool = False
    ) -> Tuple[List[Optional[Tuple[float, float]]], str, List[Typo]]:
        """Method adding fuzzy typing.

        This method takes a string (potentially already noisy from other type
        of typos), and fuzzy-type it : simulate a user on a soft-keyboard.
        This "fat-finger syndrom" is simulated using two Gaussian
        distributions, one for each axis (x, y).
        This method also returns the generated keystrokes (positions on the
        keyboard), but only for the default keyboard (ID = 0). Keystrokes from
        other keyboard are set to None.

        Args:
            word (str): String to fuzzy-type.
            error_free (bool): If set to True, don't introduce typo. Defaults
                to False.

        Returns:
            List of keystrokes.
            Fuzzy string (corresponding to the keystrokes).
            List of typos introduced.
        """
        fuzzy_word = ""
        keystrokes = []
        typos = []

        # Type word character by character
        for char in word:
            try:
                width, height, x_center, y_center, klayer_id = self.klayout.get_key_info(char)
            except KeyError:
                # This character doesn't exist on the current keyboard
                # Just type it without introducing typo, like if the user copy-pasted it
                keystrokes.append(None)
                fuzzy_word += char
                continue

            # Sample a keystroke for this character
            # Note that we don't generate typos for characters outside of the default keyboard
            if error_free or klayer_id != 0:
                keystroke = (x_center, y_center)
            else:
                # Compute mu and sigma for the Normal distribution
                x_mu = x_center + self.x_offset
                y_mu = y_center + self.y_offset
                x_sigma = (width / 2) / self.x_ratio
                y_sigma = (height / 2) / self.y_ratio

                # Sample a position (x and y)
                keystroke = (random.gauss(x_mu, x_sigma), random.gauss(y_mu, y_sigma))

            # Convert it back to a character, to see where we tapped
            fuzzy_char = self.klayout.get_key(keystroke, klayer_id)

            # Save it (save the keystroke only if part of the default keyboard)
            keystrokes.append(keystroke if klayer_id == 0 else None)
            fuzzy_word += fuzzy_char
            if fuzzy_char != char:
                typos.append(Typo.SUBSTITUTE_CHAR)

        return keystrokes, fuzzy_word, typos

    def _is_correctable(self, word: str) -> bool:
        """Method returning True if we expect the given word to be corrected
        upon typo introduction, False otherwise.

        This is necessary to ensure we don't introduce typos in words that
        can't be corrected, because if we do, it will be counted as error.

        For now, are considered non-correctable :
         * Words that don't contains any letter (from Unicode standard)

        Args:
            word (str): Word to classify as correctable or not.

        Returns:
            True if the word is correctable (and therefore we can introduce
            typo), False otherwise.
        """
        # Use the Unicode category `L` (see https://en.wikipedia.org/wiki/Unicode_character_property#General_Category)
        return not bool(re.match(r"^[^\pL]+$", word))

    def _get_common_typos(self) -> Dict[str, List[str]]:
        """Retrieve the list (if it exists) of plausible common typos to use
        when introducing typos.

        Returns:
            Dictionary where the keys are the correct words and the values are
                the associated possible typos for this word.
        """
        plang = self.lang.split("-")[0]
        common_typos_cache_file = os.path.join(CACHE_DIR, f"{plang}.json")

        # Try to access the cached common typos, and if it fails, it means we
        # don't have it locally
        try:
            with open(common_typos_cache_file, "r") as f:
                return json.load(f)
        except FileNotFoundError:
            pass

        # File is not cached, download & process the common typos from online
        os.makedirs(os.path.dirname(common_typos_cache_file), exist_ok=True)
        typos = defaultdict(list)
        if plang == "en":
            response = requests.get(TWEET_TYPO_CORPUS_URL)
            for line in response.text.strip().split("\n"):
                typoed_word, correct_word, *_ = line.split("\t")
                typos[correct_word].append(typoed_word)
        else:
            return {}

        # Save the retrieved typos in cache
        with open(common_typos_cache_file, "w") as f:
            json.dump(typos, f, indent=4)

        return typos

`type_till_space(words)`

Method introducing typos word by word.

This method receives a list of words, and type these words while introducing typos. So most of the time, only one word will be typed and the method will return. In some cases, the space is mistyped or deleted, so two words are typed.

Parameters:

Name	Type	Description	Default
`words`	`List[str]`	List of words to type.	required

Returns:

Type	Description
`List[Optional[Tuple[float, float]]]`	List of keystrokes (may contains some None).
`str`	The typed characters as string.
`int`	The number of words typed.
`List[Typo]`	The list of typos introduced in the string typed.

Source code in kebbie/noise_model.py

def type_till_space(
    self,
    words: List[str],
) -> Tuple[
    List[Optional[Tuple[float, float]]],
    str,
    int,
    List[Typo],
]:
    """Method introducing typos word by word.

    This method receives a list of words, and type these words while
    introducing typos.
    So most of the time, only one word will be typed and the method will
    return. In some cases, the space is mistyped or deleted, so two words
    are typed.

    Args:
        words (List[str]): List of words to type.

    Returns:
        List of keystrokes (may contains some None).
        The typed characters as string.
        The number of words typed.
        The list of typos introduced in the string typed.
    """
    all_keystrokes = []
    all_typed_char = ""
    all_typos = []

    for i, word in enumerate(words):
        # Some words can't be corrected (numbers, symbols, etc...) -> Don't introduce typos
        error_free = False if self._is_correctable(word) else True

        # Add typos in the word
        noisy_word, typos = self._introduce_typos(word, error_free=error_free)
        all_typos += typos

        # Type the word (fuzzy)
        keystrokes, typed_char, typos = self._fuzzy_type(noisy_word, error_free=error_free)
        all_keystrokes += keystrokes
        all_typed_char += typed_char
        all_typos += typos

        # Then, we try to type a space (separator between words)
        # TODO : Modify this part for languages without space
        noisy_space, sp_typo_1 = self._introduce_typos(SPACE)
        keystrokes, typed_char, sp_typo_2 = self._fuzzy_type(noisy_space)

        # If the space is correctly typed, return now, otherwise type the next word
        if not sp_typo_1 and not sp_typo_2:
            break
        else:
            all_keystrokes += keystrokes
            all_typed_char += typed_char
            all_typos += sp_typo_1 + sp_typo_2

    return all_keystrokes, all_typed_char, i + 1, all_typos

`swipe(word)`

Method for creating an artificial swipe gesture given a word.

Parameters:

Name	Type	Description	Default
`word`	`str`	Word to type with a swipe gesture.	required

Returns:

Type	Description
`Optional[List[Tuple[float, float]]]`	Positions (x, y) of the generated swipe gesture, or None if the swipe gesture couldn't be created.

Source code in kebbie/noise_model.py

def swipe(self, word: str) -> Optional[List[Tuple[float, float]]]:
    """Method for creating an artificial swipe gesture given a word.

    Args:
        word (str): Word to type with a swipe gesture.

    Returns:
        Positions (x, y) of the generated swipe gesture, or None if the
            swipe gesture couldn't be created.
    """
    # Some words can't be corrected (numbers, symbols, etc...) -> Don't introduce typos
    error_free = False if self._is_correctable(word) else True

    # Get the core keystrokes (fuzzy)
    keystrokes, *_ = self._fuzzy_type(word, error_free=error_free)

    # If we can swipe that word, create the corresponding artificial gesture
    if all(keystrokes) and len(keystrokes) > 1:
        return make_swipe_gesture(keystrokes)
    else:
        return None

`_introduce_typos(word, error_free=False)`

Method to introduce typos in a given string.

Either the word is changed into an existing common typo, or the word is processed as a stream of characters, each character having a chance of being mistyped. This method only add regular typos (deletions, additions, etc...), and is not introducing fuzzy typing.

Parameters:

Name	Type	Description	Default
`word`	`str`	Clean string where to add typos.	required
`error_free`	`bool`	If set to True, don't introduce typo. Defaults to False.	`False`

Returns:

Type	Description
`str`	The noisy string.
`List[Typo]`	The list of typos introduced.

Source code in kebbie/noise_model.py

def _introduce_typos(self, word: str, error_free: bool = False) -> Tuple[str, List[Typo]]:  # noqa: C901
    """Method to introduce typos in a given string.

    Either the word is changed into an existing common typo, or the word is
    processed as a stream of characters, each character having a chance of
    being mistyped.
    This method only add regular typos (deletions, additions, etc...), and
    is not introducing fuzzy typing.

    Args:
        word (str): Clean string where to add typos.
        error_free (bool): If set to True, don't introduce typo. Defaults
            to False.

    Returns:
        The noisy string.
        The list of typos introduced.
    """
    if error_free:
        return word, []

    # First of all, we either consider the word as a unit and introduce a
    # language-specific common typo (if available), or treat the word as a
    # sequence of character, where each character can have a typo
    if word in self.common_typos and sample(self.probs[Typo.COMMON_TYPO]):
        # Introduce a common typo
        return random.choice(self.common_typos[word]), [Typo.COMMON_TYPO]

    # From here, treat the word as a stream of characters, and potentially
    # add typos for each character
    noisy_word = ""
    typos = []
    word_chars = list(word)
    for i, char in enumerate(word_chars):
        # First, potentially apply simplifications (removing accent, or
        # lowercasing an uppercase character)
        # Note that if the full word is uppercase, we don't apply lowercase
        # simplification (doesn't feel like a natural typo a user would do)
        if char in self.klayout.letter_accents and sample(self.probs[Typo.SIMPLIFY_ACCENT]):
            char = strip_accents(char)
            typos.append(Typo.SIMPLIFY_ACCENT)
        if char.isupper() and len(word) > 1 and not word.isupper() and sample(self.probs[Typo.SIMPLIFY_CASE]):
            char = char.lower()
            typos.append(Typo.SIMPLIFY_CASE)

        # Check if this character exists on our keyboard
        try:
            *_, klayer_id = self.klayout.get_key_info(char)
            char_is_on_kb = True
            char_is_on_default_kb = klayer_id == 0
        except KeyError:
            char_is_on_kb = char_is_on_default_kb = False

        # Then, add the possible typo depending on the character type
        events = []
        is_first_char = bool(i == 0)
        is_last_char = bool(i >= (len(word_chars) - 1))
        if char.isnumeric() or not char_is_on_kb:
            # Don't introduce typos for numbers or symbols that are not on keyboard
            pass
        else:
            if not is_last_char:
                # Only transpose char if they are on the same keyboard layer
                try:
                    *_, next_char_klayer_id = self.klayout.get_key_info(word[i + 1])
                except KeyError:
                    next_char_klayer_id = None

                if klayer_id == next_char_klayer_id:
                    events.append(Typo.TRANSPOSE_CHAR)
            if char in self.klayout.spelling_symbols:
                events.append(Typo.DELETE_SPELLING_SYMBOL)
                events.append(Typo.ADD_SPELLING_SYMBOL)
            elif char.isspace():
                events.append(Typo.DELETE_SPACE)
                events.append(Typo.ADD_SPACE)
            elif char in string.punctuation:
                events.append(Typo.DELETE_PUNCTUATION)
                events.append(Typo.ADD_PUNCTUATION)
            elif char_is_on_default_kb:
                events.append(Typo.DELETE_CHAR)
                events.append(Typo.ADD_CHAR)

        # If it's the last character (and we are not typing a space),
        # don't add deletions typos, because it's an auto-completion case,
        # not auto-correction
        if is_last_char and word != SPACE:
            events = [e for e in events if e not in DELETIONS]

        # Get the probabilities for these possible events
        typo_probs = {e: self.probs[e] for e in events}
        if is_first_char:
            # Deleting the first character of the word is not so common, update the probabilities accordingly
            typo_probs = {e: p * FRONT_DELETION_MULTIPLIER if e in DELETIONS else p for e, p in typo_probs.items()}

        # And sample one of them
        typo = sample_among(typo_probs)

        # Process the typo
        if typo is Typo.TRANSPOSE_CHAR:
            noisy_char = word_chars[i + 1]
            word_chars[i + 1] = char
        elif typo in [Typo.DELETE_SPELLING_SYMBOL, Typo.DELETE_SPACE, Typo.DELETE_PUNCTUATION, Typo.DELETE_CHAR]:
            noisy_char = ""
        elif typo in [Typo.ADD_SPELLING_SYMBOL, Typo.ADD_SPACE, Typo.ADD_PUNCTUATION, Typo.ADD_CHAR]:
            noisy_char = f"{char}{char}"
        else:  # No typo
            noisy_char = char

        noisy_word += noisy_char
        if typo is not None:
            typos.append(typo)

    return noisy_word, typos

`_fuzzy_type(word, error_free=False)`

Method adding fuzzy typing.

This method takes a string (potentially already noisy from other type of typos), and fuzzy-type it : simulate a user on a soft-keyboard. This "fat-finger syndrom" is simulated using two Gaussian distributions, one for each axis (x, y). This method also returns the generated keystrokes (positions on the keyboard), but only for the default keyboard (ID = 0). Keystrokes from other keyboard are set to None.

Parameters:

Name	Type	Description	Default
`word`	`str`	String to fuzzy-type.	required
`error_free`	`bool`	If set to True, don't introduce typo. Defaults to False.	`False`

Returns:

Type	Description
`List[Optional[Tuple[float, float]]]`	List of keystrokes.
`str`	Fuzzy string (corresponding to the keystrokes).
`List[Typo]`	List of typos introduced.

Source code in kebbie/noise_model.py

def _fuzzy_type(
    self, word: str, error_free: bool = False
) -> Tuple[List[Optional[Tuple[float, float]]], str, List[Typo]]:
    """Method adding fuzzy typing.

    This method takes a string (potentially already noisy from other type
    of typos), and fuzzy-type it : simulate a user on a soft-keyboard.
    This "fat-finger syndrom" is simulated using two Gaussian
    distributions, one for each axis (x, y).
    This method also returns the generated keystrokes (positions on the
    keyboard), but only for the default keyboard (ID = 0). Keystrokes from
    other keyboard are set to None.

    Args:
        word (str): String to fuzzy-type.
        error_free (bool): If set to True, don't introduce typo. Defaults
            to False.

    Returns:
        List of keystrokes.
        Fuzzy string (corresponding to the keystrokes).
        List of typos introduced.
    """
    fuzzy_word = ""
    keystrokes = []
    typos = []

    # Type word character by character
    for char in word:
        try:
            width, height, x_center, y_center, klayer_id = self.klayout.get_key_info(char)
        except KeyError:
            # This character doesn't exist on the current keyboard
            # Just type it without introducing typo, like if the user copy-pasted it
            keystrokes.append(None)
            fuzzy_word += char
            continue

        # Sample a keystroke for this character
        # Note that we don't generate typos for characters outside of the default keyboard
        if error_free or klayer_id != 0:
            keystroke = (x_center, y_center)
        else:
            # Compute mu and sigma for the Normal distribution
            x_mu = x_center + self.x_offset
            y_mu = y_center + self.y_offset
            x_sigma = (width / 2) / self.x_ratio
            y_sigma = (height / 2) / self.y_ratio

            # Sample a position (x and y)
            keystroke = (random.gauss(x_mu, x_sigma), random.gauss(y_mu, y_sigma))

        # Convert it back to a character, to see where we tapped
        fuzzy_char = self.klayout.get_key(keystroke, klayer_id)

        # Save it (save the keystroke only if part of the default keyboard)
        keystrokes.append(keystroke if klayer_id == 0 else None)
        fuzzy_word += fuzzy_char
        if fuzzy_char != char:
            typos.append(Typo.SUBSTITUTE_CHAR)

    return keystrokes, fuzzy_word, typos

`_is_correctable(word)`

Method returning True if we expect the given word to be corrected upon typo introduction, False otherwise.

This is necessary to ensure we don't introduce typos in words that can't be corrected, because if we do, it will be counted as error.

For now, are considered non-correctable : * Words that don't contains any letter (from Unicode standard)

Parameters:

Name	Type	Description	Default
`word`	`str`	Word to classify as correctable or not.	required

Returns:

Type	Description
`bool`	True if the word is correctable (and therefore we can introduce
`bool`	typo), False otherwise.

Source code in kebbie/noise_model.py

def _is_correctable(self, word: str) -> bool:
    """Method returning True if we expect the given word to be corrected
    upon typo introduction, False otherwise.

    This is necessary to ensure we don't introduce typos in words that
    can't be corrected, because if we do, it will be counted as error.

    For now, are considered non-correctable :
     * Words that don't contains any letter (from Unicode standard)

    Args:
        word (str): Word to classify as correctable or not.

    Returns:
        True if the word is correctable (and therefore we can introduce
        typo), False otherwise.
    """
    # Use the Unicode category `L` (see https://en.wikipedia.org/wiki/Unicode_character_property#General_Category)
    return not bool(re.match(r"^[^\pL]+$", word))

`_get_common_typos()`

Retrieve the list (if it exists) of plausible common typos to use when introducing typos.

Returns:

Type	Description
`Dict[str, List[str]]`	Dictionary where the keys are the correct words and the values are the associated possible typos for this word.

Source code in kebbie/noise_model.py

def _get_common_typos(self) -> Dict[str, List[str]]:
    """Retrieve the list (if it exists) of plausible common typos to use
    when introducing typos.

    Returns:
        Dictionary where the keys are the correct words and the values are
            the associated possible typos for this word.
    """
    plang = self.lang.split("-")[0]
    common_typos_cache_file = os.path.join(CACHE_DIR, f"{plang}.json")

    # Try to access the cached common typos, and if it fails, it means we
    # don't have it locally
    try:
        with open(common_typos_cache_file, "r") as f:
            return json.load(f)
    except FileNotFoundError:
        pass

    # File is not cached, download & process the common typos from online
    os.makedirs(os.path.dirname(common_typos_cache_file), exist_ok=True)
    typos = defaultdict(list)
    if plang == "en":
        response = requests.get(TWEET_TYPO_CORPUS_URL)
        for line in response.text.strip().split("\n"):
            typoed_word, correct_word, *_ = line.split("\t")
            typos[correct_word].append(typoed_word)
    else:
        return {}

    # Save the retrieved typos in cache
    with open(common_typos_cache_file, "w") as f:
        json.dump(typos, f, indent=4)

    return typos

`oracle.py`

Module defining the Oracle class, which is the class taking care of iterating the dataset, introducing typos using the noise model, and querying the Corrector to correct these typos. Then the scorer is used to compute metrics about the performances, and the results are returned.

`Oracle`

Class that takes care of testing a Corrector. It basically gets clean text data, adds noise to it, send the noisy data to the Corrector, and scores its output.

This class spawn multiple processes to decrease runtime.

Parameters:

Name	Type	Description	Default
`lang`	`str`	Language used.	required
`test_data`	`Dict[str, List[str]]`	List of clean sentences for each domain.	required
`custom_keyboard`	`Dict`	If provided, instead of relying on the keyboard layout provided by default, uses the given keyboard layout.	required
`track_mistakes`	`bool`	Set to `True` for tracking the most common mistakes. Most common mistakes are added to the results dictionary.	required
`n_most_common_mistakes`	`int`	If `track_mistakes` is set to `True`, the top X mistakes to record.	required
`beta`	`float`	Beta to use for computing the F-beta score.	required

Source code in kebbie/oracle.py

class Oracle:
    """Class that takes care of testing a Corrector. It basically gets clean
    text data, adds noise to it, send the noisy data to the Corrector, and
    scores its output.

    This class spawn multiple processes to decrease runtime.

    Args:
        lang (str): Language used.
        test_data (Dict[str, List[str]]): List of clean sentences for each
            domain.
        custom_keyboard (Dict): If provided, instead of relying on
            the keyboard layout provided by default, uses the given keyboard
            layout.
        track_mistakes (bool): Set to `True` for tracking the most
            common mistakes. Most common mistakes are added to the results
            dictionary.
        n_most_common_mistakes (int): If `track_mistakes` is set to
            `True`, the top X mistakes to record.
        beta (float): Beta to use for computing the F-beta score.
    """

    def __init__(
        self,
        lang: str,
        test_data: Dict[str, List[str]],
        custom_keyboard: Dict,
        track_mistakes: bool,
        n_most_common_mistakes: int,
        beta: float,
    ) -> None:
        super().__init__()

        self.lang = lang
        self.data = test_data
        self.custom_keyboard = custom_keyboard
        self.track_mistakes = track_mistakes
        self.n_most_common_mistakes = n_most_common_mistakes
        self.beta = beta

    def test(self, corrector: Union[Corrector, List[Corrector]], n_proc: Optional[int], seed: int) -> Dict:
        """Main method, it tests the given Corrector, and returns results as a
        dictionary.

        This method spawn multiple processes to decrease runtime.

        Args:
            corrector (Union[Corrector, List[Corrector]]): Corrector to test.
                If a list of Corrector is given, the argument `n_proc` is
                ignored, and one corrector is assigned for each process.
            n_proc (Optional[int]): Number of processes to use. If `None`,
                `os.cpu_count()` is used.
            seed (int): Seed to use for running the tests.

        Returns:
            Results formatted in a dictionary.
        """
        # Initialize a global Scorer here, that will gather counts across processes
        scorer = Scorer(domains=self.data.keys(), track_mistakes=self.track_mistakes)

        # For multiprocessing
        n_proc = n_proc if n_proc is not None else os.cpu_count()
        d_size = sum(len(d) for d in self.data.values())

        # Create the corrector for each process
        proc_correctors = mp.Queue()
        if isinstance(corrector, Corrector):
            for _ in range(n_proc):
                proc_correctors.put(corrector)
        else:
            # If we already have a list of correctors, assign one for each process
            n_proc = len(corrector)
            for c in corrector:
                proc_correctors.put(c)

        with mp.Pool(
            processes=n_proc,
            initializer=init_tester,
            initargs=(tester, self.lang, self.custom_keyboard, proc_correctors, seed, self.track_mistakes),
        ) as pool, tqdm(total=d_size) as pbar:
            # Test data is made of several domain, where each domain contains a list of sentences
            for domain, sentence_list in self.data.items():
                chunk_size = max(min(CHUNK_SIZE, len(sentence_list) // n_proc), 1)
                for scr in pool.imap_unordered(tester, sentence_list, chunksize=chunk_size):
                    scr.set_domain(domain)
                    scorer.add(scr)
                    pbar.update(1)

        # Retrieve the results
        results = scorer.score(beta=self.beta)

        # Then potentially add the most common mistakes
        if self.track_mistakes:
            mistakes = {}
            for task in ["nwp", "acp", "acr"]:
                task_name = {"nwp": "next_word_prediction", "acp": "auto_completion", "acr": "auto_correction"}[task]

                m_count = getattr(scorer, f"{task}_mistakes")

                mistakes[task_name] = [("Count", "Expected", "Predictions", "Context")]
                for m, c in m_count.most_common(self.n_most_common_mistakes):
                    mistakes[task_name].append((c, m.actual, f"[{', '.join(m.preds)}]", m.context))

            results["most_common_mistakes"] = mistakes

        return results

`test(corrector, n_proc, seed)`

Main method, it tests the given Corrector, and returns results as a dictionary.

This method spawn multiple processes to decrease runtime.

Parameters:

Name	Type	Description	Default
`corrector`	`Union[Corrector, List[Corrector]]`	Corrector to test. If a list of Corrector is given, the argument `n_proc` is ignored, and one corrector is assigned for each process.	required
`n_proc`	`Optional[int]`	Number of processes to use. If `None`, `os.cpu_count()` is used.	required
`seed`	`int`	Seed to use for running the tests.	required

Returns:

Type	Description
`Dict`	Results formatted in a dictionary.

Source code in kebbie/oracle.py

def test(self, corrector: Union[Corrector, List[Corrector]], n_proc: Optional[int], seed: int) -> Dict:
    """Main method, it tests the given Corrector, and returns results as a
    dictionary.

    This method spawn multiple processes to decrease runtime.

    Args:
        corrector (Union[Corrector, List[Corrector]]): Corrector to test.
            If a list of Corrector is given, the argument `n_proc` is
            ignored, and one corrector is assigned for each process.
        n_proc (Optional[int]): Number of processes to use. If `None`,
            `os.cpu_count()` is used.
        seed (int): Seed to use for running the tests.

    Returns:
        Results formatted in a dictionary.
    """
    # Initialize a global Scorer here, that will gather counts across processes
    scorer = Scorer(domains=self.data.keys(), track_mistakes=self.track_mistakes)

    # For multiprocessing
    n_proc = n_proc if n_proc is not None else os.cpu_count()
    d_size = sum(len(d) for d in self.data.values())

    # Create the corrector for each process
    proc_correctors = mp.Queue()
    if isinstance(corrector, Corrector):
        for _ in range(n_proc):
            proc_correctors.put(corrector)
    else:
        # If we already have a list of correctors, assign one for each process
        n_proc = len(corrector)
        for c in corrector:
            proc_correctors.put(c)

    with mp.Pool(
        processes=n_proc,
        initializer=init_tester,
        initargs=(tester, self.lang, self.custom_keyboard, proc_correctors, seed, self.track_mistakes),
    ) as pool, tqdm(total=d_size) as pbar:
        # Test data is made of several domain, where each domain contains a list of sentences
        for domain, sentence_list in self.data.items():
            chunk_size = max(min(CHUNK_SIZE, len(sentence_list) // n_proc), 1)
            for scr in pool.imap_unordered(tester, sentence_list, chunksize=chunk_size):
                scr.set_domain(domain)
                scorer.add(scr)
                pbar.update(1)

    # Retrieve the results
    results = scorer.score(beta=self.beta)

    # Then potentially add the most common mistakes
    if self.track_mistakes:
        mistakes = {}
        for task in ["nwp", "acp", "acr"]:
            task_name = {"nwp": "next_word_prediction", "acp": "auto_completion", "acr": "auto_correction"}[task]

            m_count = getattr(scorer, f"{task}_mistakes")

            mistakes[task_name] = [("Count", "Expected", "Predictions", "Context")]
            for m, c in m_count.most_common(self.n_most_common_mistakes):
                mistakes[task_name].append((c, m.actual, f"[{', '.join(m.preds)}]", m.context))

        results["most_common_mistakes"] = mistakes

    return results

`init_tester(fn, lang, custom_keyboard, correctors, seed, track_mistakes)`

Function run at process initialization for Tester workers.

Each worker in a Pool will run this function when created. It will instanciate several things needed for testing the given corrector : * A Tokenizer to split sentences into words * A NoiseModel to introduce typos * A Corrector instance, which is the model we want to test

Parameters:

Name	Type	Description	Default
`fn`	`Callable`	Main tester function (instanciated objects will be attached to this function).	required
`lang`	`str`	Language used.	required
`custom_keyboard`	`Dict`	If provided, instead of relying on the keyboard layout provided by default, uses the given keyboard layout.	required
`correctors`	`Queue`	Queue containing list of correctors to test. Each process will get the next corrector available in queue.	required
`seed`	`int`	Base seed to use.	required
`track_mistakes`	`bool`	Set to `True` for tracking the most common mistakes.	required

Source code in kebbie/oracle.py

def init_tester(
    fn: Callable, lang: str, custom_keyboard: Dict, correctors: mp.Queue, seed: int, track_mistakes: bool
) -> None:
    """Function run at process initialization for Tester workers.

    Each worker in a Pool will run this function when created. It will
    instanciate several things needed for testing the given corrector :
     * A Tokenizer to split sentences into words
     * A NoiseModel to introduce typos
     * A Corrector instance, which is the model we want to test

    Args:
        fn (Callable): Main tester function (instanciated objects will be
            attached to this function).
        lang (str): Language used.
        custom_keyboard (Dict, optional): If provided, instead of relying on
            the keyboard layout provided by default, uses the given keyboard
            layout.
        correctors (mp.Queue): Queue containing list of correctors to test.
            Each process will get the next corrector available in queue.
        seed (int): Base seed to use.
        track_mistakes (bool): Set to `True` for tracking the most common
            mistakes.
    """
    fn.tokenizer = BasicTokenizer()
    fn.noisy = NoiseModel(lang, custom_keyboard=custom_keyboard)
    fn.corrector = correctors.get()
    fn.base_seed = seed
    fn.track_mistakes = track_mistakes

`tester(sentence)`

Function to test a given sentence.

It uses the noise model to introduce typos word by word, run the Corrector on various tasks (auto-completion, auto-correction, next-word prediction), and score the results.

Parameters:

Name	Type	Description	Default
`sentence`	`str`	Sentence to use as data for the test.	required

Returns:

Type	Description
`Scorer`	Scorer class with the prediction counts for this sentence.

Source code in kebbie/oracle.py

def tester(sentence: str) -> Scorer:
    """Function to test a given sentence.

    It uses the noise model to introduce typos word by word, run the
    Corrector on various tasks (auto-completion, auto-correction, next-word
    prediction), and score the results.

    Args:
        sentence (str): Sentence to use as data for the test.

    Returns:
        Scorer class with the prediction counts for this sentence.
    """
    # Set the seed for reproducibility, using the hash of the sentence
    hsh = int(hashlib.sha256(sentence.encode("utf-8")).hexdigest(), 16)
    random.seed(tester.base_seed + hsh)
    rnd_state = random.getstate()

    # Tokenize the sentence into words
    sentence = tester.tokenizer.preprocess(sentence)
    words = tester.tokenizer.word_split(sentence)

    context = ""
    # Keep track for predictions counts with a local scorer, for this sentence
    scorer = Scorer(domains=[None], track_mistakes=tester.track_mistakes)
    while words and len(context) < MAX_CHAR_PER_SENTENCE:
        # Before randomly generating typo, set the random state for determinism
        random.setstate(rnd_state)

        # It's slow to generate swipe gesture every sentence, so run it just sometimes
        word_to_swipe = words[0]
        swipe_gesture = tester.noisy.swipe(word_to_swipe) if sample(SWIPE_PROB) else None

        # Generate noisy keystrokes for the next word(s)
        keystrokes, typed_word, n_word_typed, typos = tester.noisy.type_till_space(words)

        # Get the clean word(s), update the remaining words to type and get the next word
        actual_word = " ".join(words[:n_word_typed])
        words = words[n_word_typed:]
        next_word = words[0] if len(words) > 0 else None

        # We are done with generating typo, save the random state for the next iteration
        rnd_state = random.getstate()

        if swipe_gesture:
            # Call the swipe model
            preds, memory, runtime = tester.corrector.profiled_resolve_swipe(context, swipe_gesture)
            scorer.swp(word_to_swipe, preds, context=context, memory=memory, runtime=runtime)

        # Call the model for auto-completion (for long enough words)
        if len(typed_word) > 1 and len(actual_word) > 1:
            partial_keystrokes, partial_word = sample_partial_word(keystrokes, typed_word, actual_word)
            preds, memory, runtime = tester.corrector.profiled_auto_complete(context, partial_keystrokes, partial_word)
            scorer.acp(actual_word, preds, partial_word=partial_word, context=context, memory=memory, runtime=runtime)

        # Call the model for auto-correction
        preds, memory, runtime = tester.corrector.profiled_auto_correct(context, keystrokes, typed_word)
        scorer.acr(
            actual_word, preds, typed_word=typed_word, context=context, typos=typos, memory=memory, runtime=runtime
        )

        # Update the context for the next iteration (input forcing)
        context = tester.tokenizer.update_context(context, actual_word)

        # Call the model for next-word prediction
        if next_word:
            preds, memory, runtime = tester.corrector.profiled_predict_next_word(context)
            scorer.nwp(next_word, preds, context=context, memory=memory, runtime=runtime)

    return scorer

`scorer.py`

Module implementing Scorer, a class that keep track of how many errors the model is making, and output various corresponding metrics.

`Count` `dataclass`

Structure representing the most basic counts for a task.

It counts : * Number of correct predictions * Number of top3-correct predictions * Total number of predictions

Source code in kebbie/scorer.py

@dataclass
class Count:
    """Structure representing the most basic counts for a task.

    It counts :
    * Number of correct predictions
    * Number of top3-correct predictions
    * Total number of predictions
    """

    correct: int = 0  # Number of times the first prediction was correct
    correct_3: int = 0  # Number of times one of the top-3 predictions was correct
    total: int = 0  # Total number of predictions

    def __add__(self, count: Count) -> Count:
        """Merge two `Count` instance by adding their counts.

        Args:
            count (Count): Count instance to add.

        Returns:
            Merged Count.
        """
        return Count(
            correct=self.correct + count.correct,
            correct_3=self.correct_3 + count.correct_3,
            total=self.total + count.total,
        )

    def __mul__(self, proportion: float) -> Count:
        """Multiply the current `Count` instance by a given proportion.

        Args:
            proportion (float): Proportion to multiply by.

        Returns:
            Count with the right proportion.
        """
        return Count(
            correct=round(self.correct * proportion),
            correct_3=round(self.correct_3 * proportion),
            total=round(self.total * proportion),
        )

`add(count)`

Merge two Count instance by adding their counts.

Parameters:

Name	Type	Description	Default
`count`	`Count`	Count instance to add.	required

Returns:

Type	Description
`Count`	Merged Count.

Source code in kebbie/scorer.py

def __add__(self, count: Count) -> Count:
    """Merge two `Count` instance by adding their counts.

    Args:
        count (Count): Count instance to add.

    Returns:
        Merged Count.
    """
    return Count(
        correct=self.correct + count.correct,
        correct_3=self.correct_3 + count.correct_3,
        total=self.total + count.total,
    )

`mul(proportion)`

Multiply the current Count instance by a given proportion.

Parameters:

Name	Type	Description	Default
`proportion`	`float`	Proportion to multiply by.	required

Returns:

Type	Description
`Count`	Count with the right proportion.

Source code in kebbie/scorer.py

def __mul__(self, proportion: float) -> Count:
    """Multiply the current `Count` instance by a given proportion.

    Args:
        proportion (float): Proportion to multiply by.

    Returns:
        Count with the right proportion.
    """
    return Count(
        correct=round(self.correct * proportion),
        correct_3=round(self.correct_3 * proportion),
        total=round(self.total * proportion),
    )

`Mistake` `dataclass`

Structure representing a mistake (including the context of the mistake, the expected word and the predictions).

Source code in kebbie/scorer.py

@dataclass(eq=True, frozen=True)
class Mistake:
    """Structure representing a mistake (including the context of the mistake,
    the expected word and the predictions).
    """

    actual: str = field(compare=True)
    preds: List[str] = field(compare=False)
    context: str = field(compare=False)

`Scorer`

Class keeping track of the predictions and how correct they are, but also computing the associated score for each task after the end of test.

Parameters:

Name	Type	Description	Default
`domains`	`List[str]`	The list of domains in the dataset. The Scorer keeps track of the score for each domain, so that we can spot discrepancies between domain, if any.	required
`human_readable`	`bool`	If set to `False`, performance metrics (memory, runtime) are kept in their raw, numeral form. If set to `True`, these are converted to a human readable string.	`True`
`track_mistakes`	`bool`	Set to `True` for tracking the most common mistakes.	`False`

Source code in kebbie/scorer.py

class Scorer:
    """Class keeping track of the predictions and how correct they are, but
    also computing the associated score for each task after the end of test.

    Args:
        domains (List[str]): The list of domains in the dataset. The Scorer
            keeps track of the score for each domain, so that we can spot
            discrepancies between domain, if any.
        human_readable (bool, optional): If set to `False`, performance metrics
            (memory, runtime) are kept in their raw, numeral form. If set to
            `True`, these are converted to a human readable string.
        track_mistakes (bool, optional): Set to `True` for tracking the most
            common mistakes.
    """

    def __init__(self, domains: List[str], human_readable: bool = True, track_mistakes: bool = False) -> None:
        self.human_readable = human_readable

        # For each task, create a dictionary of Counts
        # Each task has a different structure :

        # Next-word prediction : [domain] -> counts
        self.nwp_c = dd_x_layers(1)

        # Autocompletion : [domain] -> [typo/no_typo] -> [word_completion_rate] -> counts
        self.acp_c = dd_x_layers(3)

        # Autocorrection : [domain] -> [typo type/number of typo] -> counts
        self.acr_c = dd_x_layers(2)

        # Swipe resolution : [domain] -> counts
        self.swp_c = dd_x_layers(1)

        # Make sure we track each domain (create a 0-Count for each domain)
        for d in domains:
            _ = self.nwp_c[d], self.acp_c[d][WITH_TYPO][0], self.acr_c[d][None], self.swp_c[d]

        # Also keep track of memories & runtimes
        self.nwp_memories = []
        self.acp_memories = []
        self.acr_memories = []
        self.swp_memories = []
        self.nwp_runtimes = []
        self.acp_runtimes = []
        self.acr_runtimes = []
        self.swp_runtimes = []

        # Optionally track common mistakes
        self.track_mistakes = track_mistakes
        self.nwp_mistakes = Counter()
        self.acp_mistakes = Counter()
        self.acr_mistakes = Counter()
        self.swp_mistakes = Counter()

    def add(self, scorer) -> None:
        """Method to update the current Scorer with the counts from another
        Scorer.

        Args:
            scorer (Scorer): Scorer to add.
        """

        def update(d1, d2):
            for k in d2:
                if isinstance(d2[k], Count):
                    d1[k] += d2[k]
                else:
                    update(d1[k], d2[k])

        update(self.nwp_c, scorer.nwp_c)
        update(self.acp_c, scorer.acp_c)
        update(self.acr_c, scorer.acr_c)
        update(self.swp_c, scorer.swp_c)
        self.nwp_memories.extend(scorer.nwp_memories)
        self.acp_memories.extend(scorer.acp_memories)
        self.acr_memories.extend(scorer.acr_memories)
        self.swp_memories.extend(scorer.swp_memories)
        self.nwp_runtimes.extend(scorer.nwp_runtimes)
        self.acp_runtimes.extend(scorer.acp_runtimes)
        self.acr_runtimes.extend(scorer.acr_runtimes)
        self.swp_runtimes.extend(scorer.swp_runtimes)
        self.nwp_mistakes.update(scorer.nwp_mistakes)
        self.acp_mistakes.update(scorer.acp_mistakes)
        self.acr_mistakes.update(scorer.acr_mistakes)
        self.swp_mistakes.update(scorer.swp_mistakes)

    def nwp(
        self,
        true_word: str,
        predicted_words: List[str],
        context: str,
        memory: int,
        runtime: int,
        domain: Optional[str] = None,
    ) -> None:
        """Method used to record a prediction for the next-word prediction
        task.

        Args:
            true_word (str): The label (clean word to predict).
            predicted_words (List[str]): Predictions of the model.
            context (str): The context (previous words in the sentence).
            memory (int): Memory consumption for the call of the model.
            runtime (int): Runtime for the call of the model.
            domain (str): Domain of this prediction.
        """
        # Record memory & runtime
        if memory >= 0:
            self.nwp_memories.append(memory)
        if runtime >= 0:
            self.nwp_runtimes.append(runtime)

        # Record counts
        if len(predicted_words) > 0 and predicted_words[0] == true_word:
            self.nwp_c[domain].correct += 1
        if true_word in predicted_words[:3]:
            self.nwp_c[domain].correct_3 += 1
        else:
            # If the word is not in the top-3 predictions, this is a mistake
            if self.track_mistakes:
                self.nwp_mistakes.update([Mistake(actual=true_word, preds=predicted_words[:3], context=context)])

        self.nwp_c[domain].total += 1

    def acp(
        self,
        true_word: str,
        predicted_words: List[str],
        partial_word: str,
        context: str,
        memory: int,
        runtime: int,
        domain: Optional[str] = None,
    ) -> None:
        """Method used to record a prediction for the auto-completion task.

        Args:
            true_word (str): The label (clean word to predict).
            predicted_words (List[str]): Predictions of the model.
            partial_word (str): The input sent to the model (only part of the
                word to predict, with potential typos).
            context (str): The context (previous words in the sentence).
            memory (int): Memory consumption for the call of the model.
            runtime (int): Runtime for the call of the model.
            domain (str): Domain of this prediction.
        """
        # Record memory & runtime
        if memory >= 0:
            self.acp_memories.append(memory)
        if runtime >= 0:
            self.acp_runtimes.append(runtime)

        # Check if a typo was introduced or not
        has_typo = WITHOUT_TYPO if true_word.startswith(partial_word) else WITH_TYPO

        # Compute the completion rate
        completion_rate = round(len(partial_word) / len(true_word), 2)

        # Record counts
        if len(predicted_words) > 0 and predicted_words[0] == true_word:
            self.acp_c[domain][has_typo][completion_rate].correct += 1
        if true_word in predicted_words[:3]:
            self.acp_c[domain][has_typo][completion_rate].correct_3 += 1
        else:
            # If the word is not in the top-3 predictions, this is a mistake
            if self.track_mistakes:
                self.acp_mistakes.update(
                    [Mistake(actual=true_word, preds=predicted_words[:3], context=f"{context}{partial_word}")]
                )

        self.acp_c[domain][has_typo][completion_rate].total += 1

    def acr(
        self,
        true_word: str,
        predicted_words: List[str],
        typed_word: str,
        context: str,
        typos: List[Typo],
        memory: int,
        runtime: int,
        domain: Optional[str] = None,
    ) -> None:
        """Method used to record a prediction for the auto-correction task.

        Args:
            true_word (str): The label (clean word to predict).
            predicted_words (List[str]): Predictions of the model.
            typed_word (str): The word typed, containing potential typos.
            context (str): The context (previous words in the sentence).
            typos (List[Typo]): List of typos introduced.
            memory (int): Memory consumption for the call of the model.
            runtime (int): Runtime for the call of the model.
            domain (str): Domain of this prediction.
        """
        # Record memory & runtime
        if memory >= 0:
            self.acr_memories.append(memory)
        if runtime >= 0:
            self.acr_runtimes.append(runtime)

        # Get the type of typo
        if not typos:
            typo_type = None
        elif len(typos) == 1:
            typo_type = typos[0]
        else:
            typo_type = len(typos)

        # Record counts
        if len(predicted_words) > 0 and predicted_words[0] == true_word:
            self.acr_c[domain][typo_type].correct += 1
        if true_word in predicted_words[:3]:
            self.acr_c[domain][typo_type].correct_3 += 1
        else:
            # If the word is not in the top-3 predictions, this is a mistake
            if self.track_mistakes:
                self.acr_mistakes.update(
                    [Mistake(actual=true_word, preds=predicted_words[:3], context=f"{context}{typed_word}")]
                )

        self.acr_c[domain][typo_type].total += 1

    def swp(
        self,
        true_word: str,
        predicted_words: List[str],
        context: str,
        memory: int,
        runtime: int,
        domain: Optional[str] = None,
    ) -> None:
        """Method used to record a prediction for the swipe resolution task.

        Args:
            true_word (str): The label (clean word to predict).
            predicted_words (List[str]): Predictions of the model.
            context (str): The context (previous words in the sentence).
            memory (int): Memory consumption for the call of the model.
            runtime (int): Runtime for the call of the model.
            domain (str): Domain of this prediction.
        """
        # Record memory & runtime
        if memory >= 0:
            self.swp_memories.append(memory)
        if runtime >= 0:
            self.swp_runtimes.append(runtime)

        # Record counts
        if len(predicted_words) > 0 and predicted_words[0] == true_word:
            self.swp_c[domain].correct += 1
        if true_word in predicted_words[:3]:
            self.swp_c[domain].correct_3 += 1
        else:
            # If the word is not in the top-3 predictions, this is a mistake
            if self.track_mistakes:
                self.swp_mistakes.update([Mistake(actual=true_word, preds=predicted_words[:3], context=context)])

        self.swp_c[domain].total += 1

    def set_domain(self, domain: str) -> None:
        """Method setting the domain for the scores associated with no domain.

        To make it easier to score a single sentence, it's possible to call the
        scorer without a domain (see signature of `nwp()`, `acp()`, `acr()`).
        In this case the scores are associated to no domain (`None` key).
        This method allows the user to set the domain name for these scores
        with no domain (effectively moving the `None` domain scores to the
        given domain name).

        Note:
            If some scores were already linked to the given domain, these
            scores will be erased (replaced by the scores of the `None`
            domain).

        Args:
            domain (str): Domain name to associate the scores to.
        """
        if None in self.nwp_c:
            self.nwp_c[domain] = self.nwp_c.pop(None)
        if None in self.acp_c:
            self.acp_c[domain] = self.acp_c.pop(None)
        if None in self.acr_c:
            self.acr_c[domain] = self.acr_c.pop(None)
        if None in self.swp_c:
            self.swp_c[domain] = self.swp_c.pop(None)

    def _score_accuracy(self, c: Count) -> Dict:
        """Helper method to compute the accuracy given a prediction count.

        This method return a dictionary with 3 metrics :
         * Accuracy
         * Top3 accuracy
         * Total number of predictions

        Args:
            c (Count): Count object to use to compute the accuracy.

        Returns:
            Dictionary with the computed metrics.
        """
        return {
            "accuracy": round_to_n(c.correct / c.total) if c.total != 0 else 0,
            "top3_accuracy": round_to_n(c.correct_3 / c.total) if c.total != 0 else 0,
            "n": c.total,
        }

    def _score_precision_recall(self, no_typo_c: Count, typo_c: Count, beta: float) -> Dict:
        """Helper method to compute the precision and recall for
        auto-correction.

        This method return a dictionary with several metrics :
         * Accuracy
         * Precision
         * Recall
         * F-score
         * Top3 accuracy
         * Top3 precision
         * Top3 recall
         * Top3 F-score
         * Number of predictions with a typo
         * Total number of predictions

        For auto-correction, we need 2 Count objects : the counts of typos, and
        the counts of non-typo (to compute the True Negative and False Positive
        metrics).

        Args:
            no_typo_c (Count): Count object for the predictions where no typo
                were added.
            typo_c (Count): Count object for the predictions where typos were
                added.
            beta (float): Beta to use for computing the F-beta score.

        Returns:
            Dictionary with the computed metrics.
        """
        # The first step is to divide the counts into TN, FP, TP, FN
        tn = no_typo_c.correct
        fp = no_typo_c.total - no_typo_c.correct
        tp = typo_c.correct
        fn = typo_c.total - typo_c.correct

        tn_3 = no_typo_c.correct_3
        fp_3 = no_typo_c.total - no_typo_c.correct_3
        tp_3 = typo_c.correct_3
        fn_3 = typo_c.total - typo_c.correct_3

        # Then we compute the metrics
        p = precision(tp=tp, fp=fp)
        r = recall(tp=tp, fn=fn)

        p_3 = precision(tp=tp_3, fp=fp_3)
        r_3 = recall(tp=tp_3, fn=fn_3)

        return {
            "accuracy": round_to_n(accuracy(tp=tp, tn=tn, fp=fp, fn=fn)),
            "precision": round_to_n(p),
            "recall": round_to_n(r),
            "fscore": round_to_n(fbeta(precision=p, recall=r, beta=beta)),
            "top3_accuracy": round_to_n(accuracy(tp=tp_3, tn=tn_3, fp=fp_3, fn=fn_3)),
            "top3_precision": round_to_n(p_3),
            "top3_recall": round_to_n(r_3),
            "top3_fscore": round_to_n(fbeta(precision=p_3, recall=r_3, beta=beta)),
            "n_typo": typo_c.total,
            "n": no_typo_c.total + typo_c.total,
        }

    def _score_performances(self, memories: List[int], runtimes: List[int]) -> Dict:
        """Helper method to compute metrics related to the memory & runtime.

        This method returns a dictionary with several metrics :
         * The mean memory consumption
         * The min memory consumption
         * The max memory consumption
         * The mean running time
         * The fastest running time
         * The slowest running time

        Args:
            memories (List[int]): List of memories consumptions for a
                specific operation.
            runtimes (List[int]): List of runtimes for a specific operation.

        Returns:
            Dictionary with the computed metrics.
        """
        perf = {
            "mean_memory": stats.mean(memories) if memories else 0,
            "min_memory": min(memories) if memories else 0,
            "max_memory": max(memories) if memories else 0,
            "mean_runtime": stats.mean(runtimes) if runtimes else 0,
            "fastest_runtime": min(runtimes) if runtimes else 0,
            "slowest_runtime": max(runtimes) if runtimes else 0,
        }

        if self.human_readable:
            perf = {
                name: human_readable_memory(x) if name.endswith("memory") else human_readable_runtime(x)
                for name, x in perf.items()
            }

        return perf

    def score(self, beta: float = DEFAULT_BETA) -> Dict:  # noqa: C901
        """Method that computes the final scores (as well as some alternative
        metrics that can bring insight in the capabilities of the model), and
        output these in an organized dictionary.

        Args:
            beta (float, optional): Beta to use for computing the F-beta score.

        Returns:
            Dictionary containing the computed scores and metrics for the
            model tested.
        """
        # --- Next-word prediction ---
        # Group scores by domain
        per = defaultdict(Count)
        for domain, c in self.nwp_c.items():
            per[domain] += c
        total_c = sum(per.values(), Count())
        per_domain = {k: self._score_accuracy(c) for k, c in per.items()}

        # Task results
        nwp = {
            "score": self._score_accuracy(total_c),
            "per_domain": per_domain,
            "performances": self._score_performances(self.nwp_memories, self.nwp_runtimes),
        }

        # --- Auto-completion ---
        # Group scores by domain
        per = defaultdict(Count)
        for domain, d1 in self.acp_c.items():
            for has_typo, d2 in d1.items():
                for compl_rate, c in d2.items():
                    per[domain] += c
        total_c = sum(per.values(), Count())
        per_domain = {k: self._score_accuracy(c) for k, c in per.items()}

        # Group scores by completion rate
        per = defaultdict(Count)
        for domain, d1 in self.acp_c.items():
            for has_typo, d2 in d1.items():
                for compl_rate, c in d2.items():
                    per[compl_rate] += c
        per_compl_rate = {
            "<25%": self._score_accuracy(sum((c for k, c in per.items() if k < 0.25), Count())),
            "25%~50%": self._score_accuracy(sum((c for k, c in per.items() if 0.25 <= k < 0.5), Count())),
            "50%~75%": self._score_accuracy(sum((c for k, c in per.items() if 0.5 <= k < 0.75), Count())),
            ">75%": self._score_accuracy(sum((c for k, c in per.items() if 0.75 <= k), Count())),
        }

        # Group scores by with_typo / without_typo
        per = defaultdict(Count)
        for domain, d1 in self.acp_c.items():
            for has_typo, d2 in d1.items():
                for compl_rate, c in d2.items():
                    per[has_typo] += c
        per_other = {k: self._score_accuracy(per[k]) for k in [WITHOUT_TYPO, WITH_TYPO]}

        # Task results
        acp = {
            "score": self._score_accuracy(total_c),
            "per_domain": per_domain,
            "per_completion_rate": per_compl_rate,
            "per_other": per_other,
            "performances": self._score_performances(self.acp_memories, self.acp_runtimes),
        }

        # --- Auto-correction ---
        # Group scores by domain
        no_typo_per, typo_per = defaultdict(Count), defaultdict(Count)
        for domain, d1 in self.acr_c.items():
            for typo, c in d1.items():
                if typo is None:
                    no_typo_per[domain] += c
                else:
                    typo_per[domain] += c
        no_typo_total_c = sum(no_typo_per.values(), Count())
        typo_total_c = sum(typo_per.values(), Count())
        per_domain = {k: self._score_precision_recall(no_typo_per[k], typo_per[k], beta=beta) for k in no_typo_per}

        # Group scores by typo type
        no_typo_c, typo_per = Count(), defaultdict(Count)
        for domain, d1 in self.acr_c.items():
            for typo, c in d1.items():
                if typo is None:
                    no_typo_c += c
                else:
                    typo_per[typo] += c
        # Divide the total count of no-typo into each type of typos with the right proportions
        no_typo_per = defaultdict(Count, {k: no_typo_c * (c.total / typo_total_c.total) for k, c in typo_per.items()})
        per_typo_type = {t.name: self._score_precision_recall(no_typo_per[t], typo_per[t], beta=beta) for t in Typo}
        per_n_typo = {
            "1": self._score_precision_recall(
                sum((c for k, c in no_typo_per.items() if isinstance(k, Typo)), Count()),
                sum((c for k, c in typo_per.items() if isinstance(k, Typo)), Count()),
                beta=beta,
            ),
            "2": self._score_precision_recall(no_typo_per[2], typo_per[2], beta=beta),
            "3+": self._score_precision_recall(
                sum((c for k, c in no_typo_per.items() if isinstance(k, int) and k > 2), Count()),
                sum((c for k, c in typo_per.items() if isinstance(k, int) and k > 2), Count()),
                beta=beta,
            ),
        }

        # Task results
        acr = {
            "score": self._score_precision_recall(no_typo_total_c, typo_total_c, beta=beta),
            "per_domain": per_domain,
            "per_typo_type": per_typo_type,
            "per_number_of_typos": per_n_typo,
            "performances": self._score_performances(self.acr_memories, self.acr_runtimes),
        }

        # --- Swipe resolution ---
        # Group scores by domain
        per = defaultdict(Count)
        for domain, c in self.swp_c.items():
            per[domain] += c
        total_c = sum(per.values(), Count())
        per_domain = {k: self._score_accuracy(c) for k, c in per.items()}

        # Task results
        swp = {
            "score": self._score_accuracy(total_c),
            "per_domain": per_domain,
            "performances": self._score_performances(self.swp_memories, self.swp_runtimes),
        }

        # Final results
        results = {
            "next_word_prediction": nwp,
            "auto_completion": acp,
            "auto_correction": acr,
            "swipe_resolution": swp,
        }

        # Add the overall score
        results["overall_score"] = one_score(results)

        return results

`add(scorer)`

Method to update the current Scorer with the counts from another Scorer.

Parameters:

Name	Type	Description	Default
`scorer`	`Scorer`	Scorer to add.	required

Source code in kebbie/scorer.py

def add(self, scorer) -> None:
    """Method to update the current Scorer with the counts from another
    Scorer.

    Args:
        scorer (Scorer): Scorer to add.
    """

    def update(d1, d2):
        for k in d2:
            if isinstance(d2[k], Count):
                d1[k] += d2[k]
            else:
                update(d1[k], d2[k])

    update(self.nwp_c, scorer.nwp_c)
    update(self.acp_c, scorer.acp_c)
    update(self.acr_c, scorer.acr_c)
    update(self.swp_c, scorer.swp_c)
    self.nwp_memories.extend(scorer.nwp_memories)
    self.acp_memories.extend(scorer.acp_memories)
    self.acr_memories.extend(scorer.acr_memories)
    self.swp_memories.extend(scorer.swp_memories)
    self.nwp_runtimes.extend(scorer.nwp_runtimes)
    self.acp_runtimes.extend(scorer.acp_runtimes)
    self.acr_runtimes.extend(scorer.acr_runtimes)
    self.swp_runtimes.extend(scorer.swp_runtimes)
    self.nwp_mistakes.update(scorer.nwp_mistakes)
    self.acp_mistakes.update(scorer.acp_mistakes)
    self.acr_mistakes.update(scorer.acr_mistakes)
    self.swp_mistakes.update(scorer.swp_mistakes)

`nwp(true_word, predicted_words, context, memory, runtime, domain=None)`

Method used to record a prediction for the next-word prediction task.

Parameters:

Name	Type	Description	Default
`true_word`	`str`	The label (clean word to predict).	required
`predicted_words`	`List[str]`	Predictions of the model.	required
`context`	`str`	The context (previous words in the sentence).	required
`memory`	`int`	Memory consumption for the call of the model.	required
`runtime`	`int`	Runtime for the call of the model.	required
`domain`	`str`	Domain of this prediction.	`None`

Source code in kebbie/scorer.py

def nwp(
    self,
    true_word: str,
    predicted_words: List[str],
    context: str,
    memory: int,
    runtime: int,
    domain: Optional[str] = None,
) -> None:
    """Method used to record a prediction for the next-word prediction
    task.

    Args:
        true_word (str): The label (clean word to predict).
        predicted_words (List[str]): Predictions of the model.
        context (str): The context (previous words in the sentence).
        memory (int): Memory consumption for the call of the model.
        runtime (int): Runtime for the call of the model.
        domain (str): Domain of this prediction.
    """
    # Record memory & runtime
    if memory >= 0:
        self.nwp_memories.append(memory)
    if runtime >= 0:
        self.nwp_runtimes.append(runtime)

    # Record counts
    if len(predicted_words) > 0 and predicted_words[0] == true_word:
        self.nwp_c[domain].correct += 1
    if true_word in predicted_words[:3]:
        self.nwp_c[domain].correct_3 += 1
    else:
        # If the word is not in the top-3 predictions, this is a mistake
        if self.track_mistakes:
            self.nwp_mistakes.update([Mistake(actual=true_word, preds=predicted_words[:3], context=context)])

    self.nwp_c[domain].total += 1

`acp(true_word, predicted_words, partial_word, context, memory, runtime, domain=None)`

Method used to record a prediction for the auto-completion task.

Parameters:

Name	Type	Description	Default
`true_word`	`str`	The label (clean word to predict).	required
`predicted_words`	`List[str]`	Predictions of the model.	required
`partial_word`	`str`	The input sent to the model (only part of the word to predict, with potential typos).	required
`context`	`str`	The context (previous words in the sentence).	required
`memory`	`int`	Memory consumption for the call of the model.	required
`runtime`	`int`	Runtime for the call of the model.	required
`domain`	`str`	Domain of this prediction.	`None`

Source code in kebbie/scorer.py

def acp(
    self,
    true_word: str,
    predicted_words: List[str],
    partial_word: str,
    context: str,
    memory: int,
    runtime: int,
    domain: Optional[str] = None,
) -> None:
    """Method used to record a prediction for the auto-completion task.

    Args:
        true_word (str): The label (clean word to predict).
        predicted_words (List[str]): Predictions of the model.
        partial_word (str): The input sent to the model (only part of the
            word to predict, with potential typos).
        context (str): The context (previous words in the sentence).
        memory (int): Memory consumption for the call of the model.
        runtime (int): Runtime for the call of the model.
        domain (str): Domain of this prediction.
    """
    # Record memory & runtime
    if memory >= 0:
        self.acp_memories.append(memory)
    if runtime >= 0:
        self.acp_runtimes.append(runtime)

    # Check if a typo was introduced or not
    has_typo = WITHOUT_TYPO if true_word.startswith(partial_word) else WITH_TYPO

    # Compute the completion rate
    completion_rate = round(len(partial_word) / len(true_word), 2)

    # Record counts
    if len(predicted_words) > 0 and predicted_words[0] == true_word:
        self.acp_c[domain][has_typo][completion_rate].correct += 1
    if true_word in predicted_words[:3]:
        self.acp_c[domain][has_typo][completion_rate].correct_3 += 1
    else:
        # If the word is not in the top-3 predictions, this is a mistake
        if self.track_mistakes:
            self.acp_mistakes.update(
                [Mistake(actual=true_word, preds=predicted_words[:3], context=f"{context}{partial_word}")]
            )

    self.acp_c[domain][has_typo][completion_rate].total += 1

`acr(true_word, predicted_words, typed_word, context, typos, memory, runtime, domain=None)`

Method used to record a prediction for the auto-correction task.

Parameters:

Name	Type	Description	Default
`true_word`	`str`	The label (clean word to predict).	required
`predicted_words`	`List[str]`	Predictions of the model.	required
`typed_word`	`str`	The word typed, containing potential typos.	required
`context`	`str`	The context (previous words in the sentence).	required
`typos`	`List[Typo]`	List of typos introduced.	required
`memory`	`int`	Memory consumption for the call of the model.	required
`runtime`	`int`	Runtime for the call of the model.	required
`domain`	`str`	Domain of this prediction.	`None`

Source code in kebbie/scorer.py

def acr(
    self,
    true_word: str,
    predicted_words: List[str],
    typed_word: str,
    context: str,
    typos: List[Typo],
    memory: int,
    runtime: int,
    domain: Optional[str] = None,
) -> None:
    """Method used to record a prediction for the auto-correction task.

    Args:
        true_word (str): The label (clean word to predict).
        predicted_words (List[str]): Predictions of the model.
        typed_word (str): The word typed, containing potential typos.
        context (str): The context (previous words in the sentence).
        typos (List[Typo]): List of typos introduced.
        memory (int): Memory consumption for the call of the model.
        runtime (int): Runtime for the call of the model.
        domain (str): Domain of this prediction.
    """
    # Record memory & runtime
    if memory >= 0:
        self.acr_memories.append(memory)
    if runtime >= 0:
        self.acr_runtimes.append(runtime)

    # Get the type of typo
    if not typos:
        typo_type = None
    elif len(typos) == 1:
        typo_type = typos[0]
    else:
        typo_type = len(typos)

    # Record counts
    if len(predicted_words) > 0 and predicted_words[0] == true_word:
        self.acr_c[domain][typo_type].correct += 1
    if true_word in predicted_words[:3]:
        self.acr_c[domain][typo_type].correct_3 += 1
    else:
        # If the word is not in the top-3 predictions, this is a mistake
        if self.track_mistakes:
            self.acr_mistakes.update(
                [Mistake(actual=true_word, preds=predicted_words[:3], context=f"{context}{typed_word}")]
            )

    self.acr_c[domain][typo_type].total += 1

`swp(true_word, predicted_words, context, memory, runtime, domain=None)`

Method used to record a prediction for the swipe resolution task.

Parameters:

Name	Type	Description	Default
`true_word`	`str`	The label (clean word to predict).	required
`predicted_words`	`List[str]`	Predictions of the model.	required
`context`	`str`	The context (previous words in the sentence).	required
`memory`	`int`	Memory consumption for the call of the model.	required
`runtime`	`int`	Runtime for the call of the model.	required
`domain`	`str`	Domain of this prediction.	`None`

Source code in kebbie/scorer.py

def swp(
    self,
    true_word: str,
    predicted_words: List[str],
    context: str,
    memory: int,
    runtime: int,
    domain: Optional[str] = None,
) -> None:
    """Method used to record a prediction for the swipe resolution task.

    Args:
        true_word (str): The label (clean word to predict).
        predicted_words (List[str]): Predictions of the model.
        context (str): The context (previous words in the sentence).
        memory (int): Memory consumption for the call of the model.
        runtime (int): Runtime for the call of the model.
        domain (str): Domain of this prediction.
    """
    # Record memory & runtime
    if memory >= 0:
        self.swp_memories.append(memory)
    if runtime >= 0:
        self.swp_runtimes.append(runtime)

    # Record counts
    if len(predicted_words) > 0 and predicted_words[0] == true_word:
        self.swp_c[domain].correct += 1
    if true_word in predicted_words[:3]:
        self.swp_c[domain].correct_3 += 1
    else:
        # If the word is not in the top-3 predictions, this is a mistake
        if self.track_mistakes:
            self.swp_mistakes.update([Mistake(actual=true_word, preds=predicted_words[:3], context=context)])

    self.swp_c[domain].total += 1

`set_domain(domain)`

Method setting the domain for the scores associated with no domain.

To make it easier to score a single sentence, it's possible to call the scorer without a domain (see signature of nwp(), acp(), acr()). In this case the scores are associated to no domain (None key). This method allows the user to set the domain name for these scores with no domain (effectively moving the None domain scores to the given domain name).

Note

If some scores were already linked to the given domain, these scores will be erased (replaced by the scores of the None domain).

Parameters:

Name	Type	Description	Default
`domain`	`str`	Domain name to associate the scores to.	required

Source code in kebbie/scorer.py

def set_domain(self, domain: str) -> None:
    """Method setting the domain for the scores associated with no domain.

    To make it easier to score a single sentence, it's possible to call the
    scorer without a domain (see signature of `nwp()`, `acp()`, `acr()`).
    In this case the scores are associated to no domain (`None` key).
    This method allows the user to set the domain name for these scores
    with no domain (effectively moving the `None` domain scores to the
    given domain name).

    Note:
        If some scores were already linked to the given domain, these
        scores will be erased (replaced by the scores of the `None`
        domain).

    Args:
        domain (str): Domain name to associate the scores to.
    """
    if None in self.nwp_c:
        self.nwp_c[domain] = self.nwp_c.pop(None)
    if None in self.acp_c:
        self.acp_c[domain] = self.acp_c.pop(None)
    if None in self.acr_c:
        self.acr_c[domain] = self.acr_c.pop(None)
    if None in self.swp_c:
        self.swp_c[domain] = self.swp_c.pop(None)

`_score_accuracy(c)`

Helper method to compute the accuracy given a prediction count.

This method return a dictionary with 3 metrics

Accuracy
Top3 accuracy
Total number of predictions

Parameters:

Name	Type	Description	Default
`c`	`Count`	Count object to use to compute the accuracy.	required

Returns:

Type	Description
`Dict`	Dictionary with the computed metrics.

Source code in kebbie/scorer.py

def _score_accuracy(self, c: Count) -> Dict:
    """Helper method to compute the accuracy given a prediction count.

    This method return a dictionary with 3 metrics :
     * Accuracy
     * Top3 accuracy
     * Total number of predictions

    Args:
        c (Count): Count object to use to compute the accuracy.

    Returns:
        Dictionary with the computed metrics.
    """
    return {
        "accuracy": round_to_n(c.correct / c.total) if c.total != 0 else 0,
        "top3_accuracy": round_to_n(c.correct_3 / c.total) if c.total != 0 else 0,
        "n": c.total,
    }

`_score_precision_recall(no_typo_c, typo_c, beta)`

Helper method to compute the precision and recall for auto-correction.

This method return a dictionary with several metrics

Accuracy
Precision
Recall
F-score
Top3 accuracy
Top3 precision
Top3 recall
Top3 F-score
Number of predictions with a typo
Total number of predictions

For auto-correction, we need 2 Count objects : the counts of typos, and the counts of non-typo (to compute the True Negative and False Positive metrics).

Parameters:

Name	Type	Description	Default
`no_typo_c`	`Count`	Count object for the predictions where no typo were added.	required
`typo_c`	`Count`	Count object for the predictions where typos were added.	required
`beta`	`float`	Beta to use for computing the F-beta score.	required

Returns:

Type	Description
`Dict`	Dictionary with the computed metrics.

Source code in kebbie/scorer.py

def _score_precision_recall(self, no_typo_c: Count, typo_c: Count, beta: float) -> Dict:
    """Helper method to compute the precision and recall for
    auto-correction.

    This method return a dictionary with several metrics :
     * Accuracy
     * Precision
     * Recall
     * F-score
     * Top3 accuracy
     * Top3 precision
     * Top3 recall
     * Top3 F-score
     * Number of predictions with a typo
     * Total number of predictions

    For auto-correction, we need 2 Count objects : the counts of typos, and
    the counts of non-typo (to compute the True Negative and False Positive
    metrics).

    Args:
        no_typo_c (Count): Count object for the predictions where no typo
            were added.
        typo_c (Count): Count object for the predictions where typos were
            added.
        beta (float): Beta to use for computing the F-beta score.

    Returns:
        Dictionary with the computed metrics.
    """
    # The first step is to divide the counts into TN, FP, TP, FN
    tn = no_typo_c.correct
    fp = no_typo_c.total - no_typo_c.correct
    tp = typo_c.correct
    fn = typo_c.total - typo_c.correct

    tn_3 = no_typo_c.correct_3
    fp_3 = no_typo_c.total - no_typo_c.correct_3
    tp_3 = typo_c.correct_3
    fn_3 = typo_c.total - typo_c.correct_3

    # Then we compute the metrics
    p = precision(tp=tp, fp=fp)
    r = recall(tp=tp, fn=fn)

    p_3 = precision(tp=tp_3, fp=fp_3)
    r_3 = recall(tp=tp_3, fn=fn_3)

    return {
        "accuracy": round_to_n(accuracy(tp=tp, tn=tn, fp=fp, fn=fn)),
        "precision": round_to_n(p),
        "recall": round_to_n(r),
        "fscore": round_to_n(fbeta(precision=p, recall=r, beta=beta)),
        "top3_accuracy": round_to_n(accuracy(tp=tp_3, tn=tn_3, fp=fp_3, fn=fn_3)),
        "top3_precision": round_to_n(p_3),
        "top3_recall": round_to_n(r_3),
        "top3_fscore": round_to_n(fbeta(precision=p_3, recall=r_3, beta=beta)),
        "n_typo": typo_c.total,
        "n": no_typo_c.total + typo_c.total,
    }

`_score_performances(memories, runtimes)`

Helper method to compute metrics related to the memory & runtime.

This method returns a dictionary with several metrics

The mean memory consumption
The min memory consumption
The max memory consumption
The mean running time
The fastest running time
The slowest running time

Parameters:

Name	Type	Description	Default
`memories`	`List[int]`	List of memories consumptions for a specific operation.	required
`runtimes`	`List[int]`	List of runtimes for a specific operation.	required

Returns:

Type	Description
`Dict`	Dictionary with the computed metrics.

Source code in kebbie/scorer.py

def _score_performances(self, memories: List[int], runtimes: List[int]) -> Dict:
    """Helper method to compute metrics related to the memory & runtime.

    This method returns a dictionary with several metrics :
     * The mean memory consumption
     * The min memory consumption
     * The max memory consumption
     * The mean running time
     * The fastest running time
     * The slowest running time

    Args:
        memories (List[int]): List of memories consumptions for a
            specific operation.
        runtimes (List[int]): List of runtimes for a specific operation.

    Returns:
        Dictionary with the computed metrics.
    """
    perf = {
        "mean_memory": stats.mean(memories) if memories else 0,
        "min_memory": min(memories) if memories else 0,
        "max_memory": max(memories) if memories else 0,
        "mean_runtime": stats.mean(runtimes) if runtimes else 0,
        "fastest_runtime": min(runtimes) if runtimes else 0,
        "slowest_runtime": max(runtimes) if runtimes else 0,
    }

    if self.human_readable:
        perf = {
            name: human_readable_memory(x) if name.endswith("memory") else human_readable_runtime(x)
            for name, x in perf.items()
        }

    return perf

`score(beta=DEFAULT_BETA)`

Method that computes the final scores (as well as some alternative metrics that can bring insight in the capabilities of the model), and output these in an organized dictionary.

Parameters:

Name	Type	Description	Default
`beta`	`float`	Beta to use for computing the F-beta score.	`DEFAULT_BETA`

Returns:

Type	Description
`Dict`	Dictionary containing the computed scores and metrics for the
`Dict`	model tested.

Source code in kebbie/scorer.py

def score(self, beta: float = DEFAULT_BETA) -> Dict:  # noqa: C901
    """Method that computes the final scores (as well as some alternative
    metrics that can bring insight in the capabilities of the model), and
    output these in an organized dictionary.

    Args:
        beta (float, optional): Beta to use for computing the F-beta score.

    Returns:
        Dictionary containing the computed scores and metrics for the
        model tested.
    """
    # --- Next-word prediction ---
    # Group scores by domain
    per = defaultdict(Count)
    for domain, c in self.nwp_c.items():
        per[domain] += c
    total_c = sum(per.values(), Count())
    per_domain = {k: self._score_accuracy(c) for k, c in per.items()}

    # Task results
    nwp = {
        "score": self._score_accuracy(total_c),
        "per_domain": per_domain,
        "performances": self._score_performances(self.nwp_memories, self.nwp_runtimes),
    }

    # --- Auto-completion ---
    # Group scores by domain
    per = defaultdict(Count)
    for domain, d1 in self.acp_c.items():
        for has_typo, d2 in d1.items():
            for compl_rate, c in d2.items():
                per[domain] += c
    total_c = sum(per.values(), Count())
    per_domain = {k: self._score_accuracy(c) for k, c in per.items()}

    # Group scores by completion rate
    per = defaultdict(Count)
    for domain, d1 in self.acp_c.items():
        for has_typo, d2 in d1.items():
            for compl_rate, c in d2.items():
                per[compl_rate] += c
    per_compl_rate = {
        "<25%": self._score_accuracy(sum((c for k, c in per.items() if k < 0.25), Count())),
        "25%~50%": self._score_accuracy(sum((c for k, c in per.items() if 0.25 <= k < 0.5), Count())),
        "50%~75%": self._score_accuracy(sum((c for k, c in per.items() if 0.5 <= k < 0.75), Count())),
        ">75%": self._score_accuracy(sum((c for k, c in per.items() if 0.75 <= k), Count())),
    }

    # Group scores by with_typo / without_typo
    per = defaultdict(Count)
    for domain, d1 in self.acp_c.items():
        for has_typo, d2 in d1.items():
            for compl_rate, c in d2.items():
                per[has_typo] += c
    per_other = {k: self._score_accuracy(per[k]) for k in [WITHOUT_TYPO, WITH_TYPO]}

    # Task results
    acp = {
        "score": self._score_accuracy(total_c),
        "per_domain": per_domain,
        "per_completion_rate": per_compl_rate,
        "per_other": per_other,
        "performances": self._score_performances(self.acp_memories, self.acp_runtimes),
    }

    # --- Auto-correction ---
    # Group scores by domain
    no_typo_per, typo_per = defaultdict(Count), defaultdict(Count)
    for domain, d1 in self.acr_c.items():
        for typo, c in d1.items():
            if typo is None:
                no_typo_per[domain] += c
            else:
                typo_per[domain] += c
    no_typo_total_c = sum(no_typo_per.values(), Count())
    typo_total_c = sum(typo_per.values(), Count())
    per_domain = {k: self._score_precision_recall(no_typo_per[k], typo_per[k], beta=beta) for k in no_typo_per}

    # Group scores by typo type
    no_typo_c, typo_per = Count(), defaultdict(Count)
    for domain, d1 in self.acr_c.items():
        for typo, c in d1.items():
            if typo is None:
                no_typo_c += c
            else:
                typo_per[typo] += c
    # Divide the total count of no-typo into each type of typos with the right proportions
    no_typo_per = defaultdict(Count, {k: no_typo_c * (c.total / typo_total_c.total) for k, c in typo_per.items()})
    per_typo_type = {t.name: self._score_precision_recall(no_typo_per[t], typo_per[t], beta=beta) for t in Typo}
    per_n_typo = {
        "1": self._score_precision_recall(
            sum((c for k, c in no_typo_per.items() if isinstance(k, Typo)), Count()),
            sum((c for k, c in typo_per.items() if isinstance(k, Typo)), Count()),
            beta=beta,
        ),
        "2": self._score_precision_recall(no_typo_per[2], typo_per[2], beta=beta),
        "3+": self._score_precision_recall(
            sum((c for k, c in no_typo_per.items() if isinstance(k, int) and k > 2), Count()),
            sum((c for k, c in typo_per.items() if isinstance(k, int) and k > 2), Count()),
            beta=beta,
        ),
    }

    # Task results
    acr = {
        "score": self._score_precision_recall(no_typo_total_c, typo_total_c, beta=beta),
        "per_domain": per_domain,
        "per_typo_type": per_typo_type,
        "per_number_of_typos": per_n_typo,
        "performances": self._score_performances(self.acr_memories, self.acr_runtimes),
    }

    # --- Swipe resolution ---
    # Group scores by domain
    per = defaultdict(Count)
    for domain, c in self.swp_c.items():
        per[domain] += c
    total_c = sum(per.values(), Count())
    per_domain = {k: self._score_accuracy(c) for k, c in per.items()}

    # Task results
    swp = {
        "score": self._score_accuracy(total_c),
        "per_domain": per_domain,
        "performances": self._score_performances(self.swp_memories, self.swp_runtimes),
    }

    # Final results
    results = {
        "next_word_prediction": nwp,
        "auto_completion": acp,
        "auto_correction": acr,
        "swipe_resolution": swp,
    }

    # Add the overall score
    results["overall_score"] = one_score(results)

    return results

`dd_x_layers(n_layers=1)`

Helper function for creating a nested defaultdict, with a specified number of nest level. The end object is a Count.

Parameters:

Name	Type	Description	Default
`n_layers`	`int`	Number of layer for the defaultdict.	`1`

Returns:

Type	Description
`defaultdict`	Created nested defaultdict.

Source code in kebbie/scorer.py

def dd_x_layers(n_layers: int = 1) -> defaultdict:
    """Helper function for creating a nested defaultdict, with a specified
    number of nest level. The end object is a Count.

    Args:
        n_layers (int): Number of layer for the defaultdict.

    Returns:
        Created nested defaultdict.
    """
    assert n_layers > 0, f"A default dict have at least 1 layer ({n_layers} given)"
    if n_layers == 1:
        return defaultdict(Count)
    else:
        return defaultdict(partial(dd_x_layers, n_layers=n_layers - 1))

`one_score(results)`

One Score to rule them all, One Score to find them, One Score to bring them all and in the darkness bind them.

This function is here to gather the various testing metrics of a JET file in a single number, to easily compare models.

We take a single metric for each task, and weight them based on the importance of the task (these metrics already have the same scale : between 0 and 1).

For NWP and ACP we take a top-3 metric, because these tasks usually involve a user action from a proposed list. For ACR and SWP, we take a top-1 metric, since usually it's automatically applied without user input.

Parameters:

Name	Type	Description	Default
`results`	`Dict`	Testing results. Should be a dictionary containing all the metrics (used to compute the one score).	required

Returns:

Type	Description
`float`	One score, computed from the results given.

Source code in kebbie/scorer.py

def one_score(results: Dict) -> float:
    """One Score to rule them all, One Score to find them, One Score to bring
    them all and in the darkness bind them.

    This function is here to gather the various testing metrics of a JET file
    in a single number, to easily compare models.

    We take a single metric for each task, and weight them based on the
    importance of the task (these metrics already have the same scale : between
    0 and 1).

    For NWP and ACP we take a top-3 metric, because these tasks usually involve
    a user action from a proposed list. For ACR and SWP, we take a top-1
    metric, since usually it's automatically applied without user input.

    Args:
        results (Dict): Testing results. Should be a dictionary containing all
            the metrics (used to compute the one score).

    Returns:
        One score, computed from the results given.
    """
    nwp = results["next_word_prediction"]["score"]["top3_accuracy"]
    acp = results["auto_completion"]["score"]["top3_accuracy"]
    acr = results["auto_correction"]["score"]["fscore"]
    swp = results["swipe_resolution"]["score"]["accuracy"]

    return 0.15 * nwp + 0.2 * acp + 0.4 * acr + 0.25 * swp

`tokenizer.py`

Module defining BasicTokenizer, very basic tokenizer to separate a sentence into words.

`BasicTokenizer`

A basic tokenizer, used for regular latin languages. This tokenizer simply use space as word separator. Since it is used for testing only, we don't need to care about punctuations, etc...

Source code in kebbie/tokenizer.py

class BasicTokenizer:
    """A basic tokenizer, used for regular latin languages.
    This tokenizer simply use space as word separator. Since it is used for
    testing only, we don't need to care about punctuations, etc...
    """

    def preprocess(self, sentence: str) -> str:
        """Method for simple preprocessing.

        The goal of this function is not to provide an extensive and clean
        preprocessing. The goal is just to normalize some characters (that
        are not in our keyboard, so the user can't officially type them) into
        their normal counterpart, that are in the keyboard.

        Args:
            sentence (str): String to normalize.

        Returns:
            Normalized string.
        """
        # Replace things that are like "
        sentence = sentence.replace("“", '"').replace("”", '"').replace("„", '"')

        # Replace things that are like '
        sentence = sentence.replace("’", "'").replace("ʻ", "'").replace("‘", "'").replace("´", "'").replace("ʼ", "'")

        # Replace things that are like -
        sentence = sentence.replace("–", "-").replace("—", "-").replace("‑", "-").replace("−", "-").replace("ー", "-")

        # Replace other punctuations
        sentence = sentence.replace("…", "...").replace("‚", ",").replace("․", ".")

        # TODO: Each keyboard has its own way to deal with punctuation
        # (applying auto-correction or not, displaying next-word prediction or
        # not, etc...). So for now we just get rid of the punctuations, it's a
        # convenient shortcut and it's fair to all keyboards.
        # Eventually we should find a better way to deal with that.
        sentence = re.sub(r"\s*\.+\s*", " ", sentence)
        sentence = re.sub(r"\s*[,:;\(\)\"!?\[\]\{\}~]\s*", " ", sentence)

        return sentence

    def word_split(self, sentence: str) -> List[str]:
        """Method for splitting a sentence into a list of words.

        Args:
            sentence (str): Sentence to split.

        Returns:
            List of words from the sentence.
        """
        return sentence.strip().split()

    def update_context(self, context: str, word: str) -> str:
        """Method for updating a context, given a word that was typed.

        Args:
            context (str): Existing context.
            word (str): Word being typed.

        Returns:
            Updated context.
        """
        return context + word + " "

`preprocess(sentence)`

Method for simple preprocessing.

The goal of this function is not to provide an extensive and clean preprocessing. The goal is just to normalize some characters (that are not in our keyboard, so the user can't officially type them) into their normal counterpart, that are in the keyboard.

Parameters:

Name	Type	Description	Default
`sentence`	`str`	String to normalize.	required

Returns:

Type	Description
`str`	Normalized string.

Source code in kebbie/tokenizer.py

def preprocess(self, sentence: str) -> str:
    """Method for simple preprocessing.

    The goal of this function is not to provide an extensive and clean
    preprocessing. The goal is just to normalize some characters (that
    are not in our keyboard, so the user can't officially type them) into
    their normal counterpart, that are in the keyboard.

    Args:
        sentence (str): String to normalize.

    Returns:
        Normalized string.
    """
    # Replace things that are like "
    sentence = sentence.replace("“", '"').replace("”", '"').replace("„", '"')

    # Replace things that are like '
    sentence = sentence.replace("’", "'").replace("ʻ", "'").replace("‘", "'").replace("´", "'").replace("ʼ", "'")

    # Replace things that are like -
    sentence = sentence.replace("–", "-").replace("—", "-").replace("‑", "-").replace("−", "-").replace("ー", "-")

    # Replace other punctuations
    sentence = sentence.replace("…", "...").replace("‚", ",").replace("․", ".")

    # TODO: Each keyboard has its own way to deal with punctuation
    # (applying auto-correction or not, displaying next-word prediction or
    # not, etc...). So for now we just get rid of the punctuations, it's a
    # convenient shortcut and it's fair to all keyboards.
    # Eventually we should find a better way to deal with that.
    sentence = re.sub(r"\s*\.+\s*", " ", sentence)
    sentence = re.sub(r"\s*[,:;\(\)\"!?\[\]\{\}~]\s*", " ", sentence)

    return sentence

`word_split(sentence)`

Method for splitting a sentence into a list of words.

Parameters:

Name	Type	Description	Default
`sentence`	`str`	Sentence to split.	required

Returns:

Type	Description
`List[str]`	List of words from the sentence.

Source code in kebbie/tokenizer.py

def word_split(self, sentence: str) -> List[str]:
    """Method for splitting a sentence into a list of words.

    Args:
        sentence (str): Sentence to split.

    Returns:
        List of words from the sentence.
    """
    return sentence.strip().split()

`update_context(context, word)`

Method for updating a context, given a word that was typed.

Parameters:

Name	Type	Description	Default
`context`	`str`	Existing context.	required
`word`	`str`	Word being typed.	required

Returns:

Type	Description
`str`	Updated context.

Source code in kebbie/tokenizer.py

def update_context(self, context: str, word: str) -> str:
    """Method for updating a context, given a word that was typed.

    Args:
        context (str): Existing context.
        word (str): Word being typed.

    Returns:
        Updated context.
    """
    return context + word + " "

`utils.py`

Various utils function used by kebbie.

`profile_fn(fn, *args, **kwargs)`

Profile the runtime and memory usage of the given function.

Note that it will only account for memory allocated by python (if you use a library in C/C++ that does its own allocation, it won't report it).

Parameters:

Name	Type	Description	Default
`fn`	`Callable`	Function to profile.	required
`*args`	`Any`	Positional arguments to pass to the given function.	`()`
`**kwargs`	`Any`	Keywords arguments to pass to the given function.	`{}`

Returns:

Type	Description
`Any`	The return value of the function called.
`int`	The memory usage (in bytes).
`int`	The runtime (in nano seconds).

Source code in kebbie/utils.py

def profile_fn(fn: Callable, *args: Any, **kwargs: Any) -> Tuple[Any, int, int]:
    """Profile the runtime and memory usage of the given function.

    Note that it will only account for memory allocated by python (if you use
    a library in C/C++ that does its own allocation, it won't report it).

    Args:
        fn (Callable): Function to profile.
        *args: Positional arguments to pass to the given function.
        **kwargs: Keywords arguments to pass to the given function.

    Returns:
        The return value of the function called.
        The memory usage (in bytes).
        The runtime (in nano seconds).
    """
    tracemalloc.start()
    t0 = time.time()

    result = fn(*args, **kwargs)

    runtime = time.time() - t0
    _, memory = tracemalloc.get_traced_memory()

    return result, memory, runtime * SEC_TO_NANOSEC

`euclidian_dist(p1, p2)`

Function computing the euclidian distance between 2 points.

Parameters:

Name	Type	Description	Default
`p1`	`Tuple[float, float]`	Point 1.	required
`p2`	`Tuple[float, float]`	Point 2.	required

Returns:

Type	Description
`float`	Euclidian distance between the 2 given points.

Source code in kebbie/utils.py

def euclidian_dist(p1: Tuple[float, float], p2: Tuple[float, float]) -> float:
    """Function computing the euclidian distance between 2 points.

    Args:
        p1 (Tuple[float, float]): Point 1.
        p2 (Tuple[float, float]): Point 2.

    Returns:
        Euclidian distance between the 2 given points.
    """
    return math.sqrt(sum((a - b) ** 2 for a, b in zip(p1, p2)))

`load_keyboard(lang='en-US')`

Load the keyboard data for the given language.

For now, only en-US is supported.

Parameters:

Name	Type	Description	Default
`lang`	`str`	Language of the keyboard to load.	`'en-US'`

Returns:

Type	Description
`Dict`	The keyboard data.

Source code in kebbie/utils.py

def load_keyboard(lang: str = "en-US") -> Dict:
    """Load the keyboard data for the given language.

    For now, only `en-US` is supported.

    Args:
        lang (str, optional): Language of the keyboard to load.

    Returns:
        The keyboard data.
    """
    layout_folder = Path(__file__).parent / "layouts"
    with open(layout_folder / f"{lang}.json", "r") as f:
        keyboard = json.load(f)
    return keyboard

`strip_accents(s)`

Util function for removing accents from a given string.

Parameters:

Name	Type	Description	Default
`s`	`str`	Accented string.	required

Returns:

Type	Description
`str`	Same string, without accent.

Source code in kebbie/utils.py

def strip_accents(s: str) -> str:
    """Util function for removing accents from a given string.

    Args:
        s (str): Accented string.

    Returns:
        Same string, without accent.
    """
    nfkd_form = unicodedata.normalize("NFKD", s)
    return "".join([c for c in nfkd_form if not unicodedata.combining(c)])

`sample(proba)`

Simple function to sample an event with the given probability. For example, calling sample(0.95) will return True in 95% cases, and False in 5% cases.

Parameters:

Name	Type	Description	Default
`proba`	`float`	Probability of the event to happen. Should be between 0 and 1 (included).	required

Returns:

Type	Description
`bool`	`True` if the event was sampled, `False` otherwise.

Source code in kebbie/utils.py

def sample(proba: float) -> bool:
    """Simple function to sample an event with the given probability.
    For example, calling `sample(0.95)` will return `True` in 95% cases, and
    `False` in 5% cases.

    Args:
        proba (float): Probability of the event to happen. Should be between 0
            and 1 (included).

    Returns:
        `True` if the event was sampled, `False` otherwise.
    """
    assert 0 <= proba <= 1, f"`{proba}` is not a valid probability (should be between 0 and 1)"
    if proba == 0:
        return False
    elif proba == 1:
        return True
    else:
        return random.choices([True, False], weights=[proba, 1 - proba])[0]

`sample_among(probs, with_none=True)`

Function that sample an event among several with different probabilities.

Parameters:

Name	Type	Description	Default
`probs`	`Dict[Any, float]`	Dictionary representing the different events and their probabilities. Each probability should be above 0 and their sum should not exceed 1.	required
`with_none`	`bool`	If set to `True`, add a `None` option (no event sampled).	`True`

Returns:

Type	Description
`Any`	The corresponding key of the event sampled.

Source code in kebbie/utils.py

def sample_among(probs: Dict[Any, float], with_none: bool = True) -> Any:
    """Function that sample an event among several with different
    probabilities.

    Args:
        probs (Dict[Any, float]): Dictionary representing the different events
            and their probabilities. Each probability should be above 0 and
            their sum should not exceed 1.
        with_none (bool): If set to `True`, add a `None` option (no event
            sampled).

    Returns:
        The corresponding key of the event sampled.
    """
    options = list(probs.keys())
    weights = list(probs.values())
    assert (
        all(w >= 0 for w in weights) and sum(weights) <= 1
    ), "The numbers given are not a probability (should be above 0 and their sum should not exceed 1)"

    if with_none:
        options.append(None)
        weights.append(1 - sum(weights))

    return random.choices(options, weights=weights)[0]

`sample_partial_word(keystrokes, word, true_word)`

Sample a partial word from a given word, and extract the corresponding keystrokes as well.

Sampling is done with increasing weights (more chances to sample a longer list). For example if the list represent the keystrokes of "abcdef", the probabilities are as follow: * "a" : 1/15 * "ab" : 2/15 * "abc" : 3/15 * "abcd" : 4/15 * "abcde" : 5/15

Parameters:

Name	Type	Description	Default
`keystrokes`	`List[Optional[Tuple[float, float]]]`	Complete list of keystrokes, representing a full word.	required
`word`	`str`	The word corresponding to the keystrokes.	required
`true_word`	`str`	Actual word (without typo). Necessary to ensure the sampled keystrokes are partial.	required

Returns:

Type	Description
`List[Optional[Tuple[float, float]]]`	The partial list of keystrokes (sampled from the given word).
`str`	The partial word (sampled from the given word).

Source code in kebbie/utils.py

def sample_partial_word(
    keystrokes: List[Optional[Tuple[float, float]]], word: str, true_word: str
) -> Tuple[List[Optional[Tuple[float, float]]], str]:
    """Sample a partial word from a given word, and extract the corresponding
    keystrokes as well.

    Sampling is done with increasing weights (more chances to sample a longer
    list). For example if the list represent the keystrokes of "abcdef", the
    probabilities are as follow:
     * "a" :     1/15
     * "ab" :    2/15
     * "abc" :   3/15
     * "abcd" :  4/15
     * "abcde" : 5/15

    Args:
        keystrokes (List[Optional[Tuple[float, float]]]): Complete list of
            keystrokes, representing a full word.
        word (str): The word corresponding to the keystrokes.
        true_word (str): Actual word (without typo). Necessary to ensure the
            sampled keystrokes are partial.

    Returns:
        The partial list of keystrokes (sampled from the given word).
        The partial word (sampled from the given word).
    """
    r = range(1, min(len(true_word), len(word)))
    s = random.choices(r, weights=r)[0]
    return keystrokes[:s], word[:s]

`accuracy(tp, tn, fp, fn)`

Function computing the precision.

Parameters:

Name	Type	Description	Default
`tp`	`int`	Number of True Positive.	required
`tn`	`int`	Number of True Negative.	required
`fp`	`int`	Number of False Positive.	required
`fn`	`int`	Number of False Negative.	required

Returns:

Type	Description
`float`	Accuracy.

Source code in kebbie/utils.py

def accuracy(tp: int, tn: int, fp: int, fn: int) -> float:
    """Function computing the precision.

    Args:
        tp (int): Number of True Positive.
        tn (int): Number of True Negative.
        fp (int): Number of False Positive.
        fn (int): Number of False Negative.

    Returns:
        Accuracy.
    """
    try:
        return (tp + tn) / (tp + tn + fp + fn)
    except ZeroDivisionError:
        return 0

`precision(tp, fp)`

Function computing the precision.

Parameters:

Name	Type	Description	Default
`tp`	`int`	Number of True Positive.	required
`fp`	`int`	Number of False Positive.	required

Returns:

Type	Description
`float`	Precision.

Source code in kebbie/utils.py

def precision(tp: int, fp: int) -> float:
    """Function computing the precision.

    Args:
        tp (int): Number of True Positive.
        fp (int): Number of False Positive.

    Returns:
        Precision.
    """
    try:
        return tp / (tp + fp)
    except ZeroDivisionError:
        return 0

`recall(tp, fn)`

Function computing the recall.

Parameters:

Name	Type	Description	Default
`tp`	`int`	Number of True Positive.	required
`fn`	`int`	Number of False Negative.	required

Returns:

Type	Description
`float`	Recall.

Source code in kebbie/utils.py

def recall(tp: int, fn: int) -> float:
    """Function computing the recall.

    Args:
        tp (int): Number of True Positive.
        fn (int): Number of False Negative.

    Returns:
        Recall.
    """
    try:
        return tp / (tp + fn)
    except ZeroDivisionError:
        return 0

`fbeta(precision, recall, beta=1)`

Function computing the F-beta score (which is a generalization of the F1 score).

The value of Beta changes how much we weight recall versus precision

For beta=0.5, Precision is twice as important as Recall
For beta=2, Recall is twice as important as Precision

Parameters:

Name	Type	Description	Default
`precision`	`float`	Precision.	required
`recall`	`float`	Recall.	required
`beta`	`float`	Beta factor.	`1`

Returns:

Type	Description
`float`	F-beta score.

Source code in kebbie/utils.py

def fbeta(precision: float, recall: float, beta: float = 1) -> float:
    """Function computing the F-beta score (which is a generalization of the
    F1 score).

    The value of Beta changes how much we weight recall versus precision:
     * For beta=0.5, Precision is twice as important as Recall
     * For beta=2, Recall is twice as important as Precision

    Args:
        precision (float): Precision.
        recall (float): Recall.
        beta (float): Beta factor.

    Returns:
        F-beta score.
    """
    try:
        return (1 + beta**2) * precision * recall / (beta**2 * precision + recall)
    except ZeroDivisionError:
        return 0

`round_to_n(x, n=2)`

Util function to round a given number to n significant digits.

Parameters:

Name	Type	Description	Default
`x`	`float`	Number to round.	required
`n`	`int`	Number of significant digits to use.	`2`

Returns:

Type	Description
`float`	Rounded number.

Source code in kebbie/utils.py

def round_to_n(x: float, n: int = 2) -> float:
    """Util function to round a given number to n significant digits.

    Args:
        x (float): Number to round.
        n (int): Number of significant digits to use.

    Returns:
        Rounded number.
    """
    return round(x, -int(math.floor(math.log10(x))) + (n - 1)) if x != 0 else 0

`human_readable_memory(x)`

Given a number in bytes, return a human-readable string of this number, with the right unit.

Parameters:

Name	Type	Description	Default
`x`	`int`	Number in bytes.	required

Returns:

Type	Description
`str`	Human-readable version of the given number, with the right unit.

Source code in kebbie/utils.py

def human_readable_memory(x: int) -> str:
    """Given a number in bytes, return a human-readable string of this number,
    with the right unit.

    Args:
        x (int): Number in bytes.

    Returns:
        Human-readable version of the given number, with the right unit.
    """
    x = round_to_n(x, n=3)
    for unit in ["B", "KB", "MB", "GB"]:
        if x < 1000:
            return f"{x:g} {unit}"

        x /= 1000
    return f"{x:g} TB"

`human_readable_runtime(x)`

Given a number in nanoseconds, return a human-readable string of this number, with the right unit.

Parameters:

Name	Type	Description	Default
`x`	`int`	Number in nanoseconds.	required

Returns:

Type	Description
`str`	Human-readable version of the given number, with the right unit.

Source code in kebbie/utils.py

def human_readable_runtime(x: int) -> str:
    """Given a number in nanoseconds, return a human-readable string of this
    number, with the right unit.

    Args:
        x (int): Number in nanoseconds.

    Returns:
        Human-readable version of the given number, with the right unit.
    """
    x = round_to_n(x, n=3)
    for unit in ["ns", "μs", "ms"]:
        if x < 1000:
            return f"{x:g} {unit}"

        x /= 1000
    return f"{x:g} s"

`get_soda_dataset(max_sentences=2000, seed=31)`

Load the SODA dataset.

Parameters:

Name	Type	Description	Default
`max_sentences`	`int`	Maximum number of sentences in total in the dataset. They will be shared across domain (50% from the `narrative` domain, 50% from the `dialogue` domain).	`2000`
`seed`	`int`	Seed to use when shuffling the dataset (since we don't use the whole dataset, it's better to shuffle it before extracting the X first sentences).	`31`

Returns:

Type	Description
`Dict[str, List[str]]`	The dataset, separated into two domains : narrative and dialogue.

Source code in kebbie/utils.py

def get_soda_dataset(max_sentences: int = 2_000, seed: int = 31) -> Dict[str, List[str]]:
    """Load the SODA dataset.

    Args:
        max_sentences (int, optional): Maximum number of sentences in total in
            the dataset. They will be shared across domain (50% from the
            `narrative` domain, 50% from the `dialogue` domain).
        seed (int, optional): Seed to use when shuffling the dataset (since we
            don't use the whole dataset, it's better to shuffle it before
            extracting the X first sentences).

    Returns:
        The dataset, separated into two domains : narrative and dialogue.
    """
    data = {"narrative": [], "dialogue": []}
    max_domain_sentences = max_sentences // 2

    hf_dataset = datasets.load_dataset("allenai/soda", split="test")
    hf_dataset = hf_dataset.shuffle(seed=seed)

    for sample in hf_dataset:
        if len(data["narrative"]) >= max_domain_sentences and len(data["dialogue"]) >= max_domain_sentences:
            break

        if len(data["narrative"]) < max_domain_sentences:
            data["narrative"].append(sample["narrative"])

        for sen in sample["dialogue"]:
            if len(data["dialogue"]) < max_domain_sentences:
                data["dialogue"].append(sen)

    return data

Constants

`init.py`

`SUPPORTED_LANG = ['en-US']`

`N_MOST_COMMON_MISTAKES = 1000`

`DEFAULT_SEED = 42`

`emulator.py`

`ANDROID = 'android'`

`IOS = 'ios'`

`GBOARD = 'gboard'`

`TAPPA = 'tappa'`

`FLEKSY = 'fleksy'`

`KBKITPRO = 'kbkitpro'`

`KBKITOSS = 'kbkitoss'`

`SWIFTKEY = 'swiftkey'`

`YANDEX = 'yandex'`

`KEYBOARD_PACKAGE = {GBOARD: 'com.google.android.inputmethod.latin', SWIFTKEY: 'com.touchtype.swiftkey', YANDEX: 'ru.yandex.androidkeyboard', TAPPA: 'com.tappa.keyboard'}`

`ANDROID_CAPABILITIES = {'platformName': 'android', 'automationName': 'UiAutomator2', 'enableMultiWindows': True, 'deviceName': 'test', 'newCommandTimeout': 3600}`

`IOS_CAPABILITIES = {'platformName': 'iOS', 'automationName': 'XCUITest', 'udid': 'auto', 'xcodeOrgId': '8556JTA4X4', 'xcodeSigningId': 'iPhone Developer', 'useNewWDA': False, 'usePrebuiltWdDA': True, 'startIWDP': True, 'bundleId': 'com.apple.MobileSMS', 'newCommandTimeout': 3600}`

`BROWSER_PAD_URL = 'https://www.justnotepad.com'`

`ANDROID_TYPING_FIELD_CLASS_NAME = 'android.widget.EditText'`

`DUMMY_RECIPIENT = '0'`

`IOS_TYPING_FIELD_ID = 'messageBodyField'`

`IOS_START_CHAT_CLASS_NAME = 'XCUIElementTypeCell'`

`TESSERACT_CONFIG = '-c tessedit_char_blacklist=0123456789”:!@·$%&/()=.¿?'`

`PREDICTION_DELAY = 0.4`

CONTENT_TO_IGNORE = ['Sticker', 'GIF', 'Clipboard', 'Settings', 'Back', 'Switch input method', 'Paste item', 'Close', 'paintpalette', 'Search Document', 'Microphone', 'gearshape', 'Next Locale', 'paintpalette', 'EmojiCategories/smileysAndPeople', 'EmojiCategories/animalsAndNature', 'EmojiCategories/foodAndDrink', 'EmojiCategories/activity', 'EmojiCategories/travelAndPlaces', 'EmojiCategories/objects', 'EmojiCategories/symbols', 'EmojiCategories/flags', 'Add', 'And', 'Are', '“A”', '🚀', 'Switch language.']

CONTENT_TO_RENAME = {'Shift': 'shift', 'Delete': 'backspace', 'Backspace': 'backspace', 'Space': 'spacebar', 'space': 'spacebar', 'Space.': 'spacebar', 'Emoji button': 'smiley', 'Emoji': 'smiley', 'Keyboard Type - emojis': 'smiley', 'Search': 'enter', 'return': 'enter', 'Enter': 'enter', 'Delete.': 'backspace', 'To symbols.': 'numbers', 'Return.': 'enter', 'Symbol keyboard': 'numbers', 'Symbols': 'numbers', 'Symbols and numbers': 'numbers', 'Keyboard Type - numeric': 'numbers', 'Voice input': 'mic', ',, alternatives available, Voice typing, long press to activate': 'mic', 'Close features menu': 'magic', 'Open features menu': 'magic', 'underline': '_', '&': '&', 'ampersand': '&', 'Dash': '-', 'Plus': '+', 'Left parenthesis': '(', 'Right parenthesis': ')', 'slash': '/', 'Apostrophe': "'", 'Colon': ':', 'Semicolon': ';', 'Exclamation': '!', 'Question mark': '?', 'Letter keyboard': 'letters', 'Letters': 'letters', 'Keyboard Type - auto': 'letters', 'To letters.': 'letters', 'Digit keyboard': 'numbers', 'More symbols': 'shift', 'Keyboard Type - symbolic': 'shift', 'Double tap for uppercase': 'shift', 'Double tap for caps lock': 'shift', 'Uppercase key.': 'shift', 'Additional symbols.': 'shift', 'capital Q': 'Q', 'capital W': 'W', 'capital E': 'E', 'capital R': 'R', 'capital T': 'T', 'capital Y': 'Y', 'capital U': 'U', 'capital I': 'I', 'Capital I': 'I', 'capital O': 'O', 'capital P': 'P', 'capital A': 'A', 'capital S': 'S', 'capital D': 'D', 'capital F': 'F', 'capital G': 'G', 'capital H': 'H', 'capital J': 'J', 'capital K': 'K', 'capital L': 'L', 'capital Z': 'Z', 'capital X': 'X', 'capital C': 'C', 'capital V': 'V', 'capital B': 'B', 'capital N': 'N', 'capital M': 'M'}

FLEKSY_LAYOUT = {'keyboard_frame': [0, 517, 393, 266], 'lowercase': {'q': [0.007407407407407408, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'w': [0.10462962962962963, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'e': [0.20462962962962963, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'r': [0.30462962962962964, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 't': [0.4046296296296296, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'y': [0.5046296296296297, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'u': [0.6046296296296296, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'i': [0.7046296296296296, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'o': [0.8046296296296296, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'p': [0.9046296296296297, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'a': [0.05740740740740741, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 's': [0.15555555555555556, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'd': [0.25555555555555554, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'f': [0.35462962962962963, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'g': [0.4546296296296296, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'h': [0.5546296296296296, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'j': [0.6546296296296297, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'k': [0.7546296296296297, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'l': [0.8555555555555555, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'shift': [0.007407407407407408, 0.5994520547945206, 0.1361111111111111, 0.1643835616438356], 'z': [0.15555555555555556, 0.5994520547945206, 0.08796296296296297, 0.1643835616438356], 'x': [0.25555555555555554, 0.5994520547945206, 0.08796296296296297, 0.1643835616438356], 'c': [0.35462962962962963, 0.5994520547945206, 0.08796296296296297, 0.1643835616438356], 'v': [0.4546296296296296, 0.5994520547945206, 0.08796296296296297, 0.1643835616438356], 'b': [0.5546296296296296, 0.5994520547945206, 0.08796296296296297, 0.1643835616438356], 'n': [0.6546296296296297, 0.5994520547945206, 0.08796296296296297, 0.1643835616438356], 'm': [0.7546296296296297, 0.5994520547945206, 0.08796296296296297, 0.1643835616438356], 'backspace': [0.8555555555555555, 0.5994520547945206, 0.1361111111111111, 0.1643835616438356], 'numbers': [0.007407407407407408, 0.8080821917808219, 0.125, 0.1643835616438356], 'smiley': [0.14351851851851852, 0.8080821917808219, 0.10277777777777777, 0.1643835616438356], 'spacebar': [0.25555555555555554, 0.8080821917808219, 0.48703703703703705, 0.1643835616438356], '.': [0.7546296296296297, 0.8080821917808219, 0.1, 0.1643835616438356], 'enter': [0.8648148148148148, 0.8080821917808219, 0.12962962962962962, 0.1643835616438356]}, 'uppercase': {'Q': [0.007407407407407408, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'W': [0.10462962962962963, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'E': [0.20462962962962963, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'R': [0.30462962962962964, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'T': [0.4046296296296296, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'Y': [0.5046296296296297, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'U': [0.6046296296296296, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'I': [0.7046296296296296, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'O': [0.8046296296296296, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'P': [0.9046296296296297, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], 'A': [0.05740740740740741, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'S': [0.15555555555555556, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'D': [0.25555555555555554, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'F': [0.35462962962962963, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'G': [0.4546296296296296, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'H': [0.5546296296296296, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'J': [0.6546296296296297, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'K': [0.7546296296296297, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'L': [0.8555555555555555, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'shift': [0.007407407407407408, 0.5994520547945206, 0.1361111111111111, 0.1643835616438356], 'Z': [0.15555555555555556, 0.5994520547945206, 0.08796296296296297, 0.1643835616438356], 'X': [0.25555555555555554, 0.5994520547945206, 0.08796296296296297, 0.1643835616438356], 'C': [0.35462962962962963, 0.5994520547945206, 0.08796296296296297, 0.1643835616438356], 'V': [0.4546296296296296, 0.5994520547945206, 0.08796296296296297, 0.1643835616438356], 'B': [0.5546296296296296, 0.5994520547945206, 0.08796296296296297, 0.1643835616438356], 'N': [0.6546296296296297, 0.5994520547945206, 0.08796296296296297, 0.1643835616438356], 'M': [0.7546296296296297, 0.5994520547945206, 0.08796296296296297, 0.1643835616438356], 'backspace': [0.8555555555555555, 0.5994520547945206, 0.1361111111111111, 0.1643835616438356], 'numbers': [0.007407407407407408, 0.8080821917808219, 0.125, 0.1643835616438356], 'smiley': [0.14351851851851852, 0.8080821917808219, 0.10277777777777777, 0.1643835616438356], 'spacebar': [0.25555555555555554, 0.8080821917808219, 0.48703703703703705, 0.1643835616438356], '.': [0.7546296296296297, 0.8080821917808219, 0.1, 0.1643835616438356], 'enter': [0.8648148148148148, 0.8080821917808219, 0.12962962962962962, 0.1643835616438356]}, 'numbers': {'1': [0.007407407407407408, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], '2': [0.10462962962962963, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], '3': [0.20462962962962963, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], '4': [0.30462962962962964, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], '5': [0.4046296296296296, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], '6': [0.5046296296296297, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], '7': [0.6046296296296296, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], '8': [0.7046296296296296, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], '9': [0.8046296296296296, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], '0': [0.9046296296296297, 0.19356164383561644, 0.08796296296296297, 0.1643835616438356], '-': [0.007407407407407408, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], '/': [0.10462962962962963, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], ':': [0.20462962962962963, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], ';': [0.30462962962962964, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], '(': [0.4046296296296296, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], ')': [0.5046296296296297, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], '$': [0.6046296296296296, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], '&': [0.7046296296296296, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], '@': [0.8046296296296296, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], '"': [0.9046296296296297, 0.4008219178082192, 0.08796296296296297, 0.1643835616438356], 'shift': [0.007407407407407408, 0.5994520547945206, 0.1361111111111111, 0.1643835616438356], ',': [0.3101851851851852, 0.5994520547945206, 0.12, 0.1643835616438356], '?': [0.44044444444444447, 0.5994520547945206, 0.12, 0.1643835616438356], '!': [0.5707037037037037, 0.5994520547945206, 0.12, 0.1643835616438356], "'": [0.705962962962963, 0.5994520547945206, 0.12, 0.1643835616438356], 'backspace': [0.8551851851851852, 0.5994520547945206, 0.1361111111111111, 0.1643835616438356], 'letters': [0.007407407407407408, 0.8080821917808219, 0.125, 0.1643835616438356], 'smiley': [0.14351851851851852, 0.8080821917808219, 0.10277777777777777, 0.1643835616438356], 'spacebar': [0.25555555555555554, 0.8080821917808219, 0.48703703703703705, 0.1643835616438356], '.': [0.7546296296296297, 0.8080821917808219, 0.1, 0.1643835616438356], 'enter': [0.8648148148148148, 0.8080821917808219, 0.12962962962962962, 0.1643835616438356]}}

`gesture.py`

`MAX_RADIUS = 16`

`MIN_N_POINTS_PER_DIST = 0.1`

`MAX_N_POINTS_PER_DIST = 0.25`

`MIN_ACCELERATION = 0.2`

`MAX_ACCELERATION = 0.5`

`layout.py`

`SPACE = 'spacebar'`

`POINT = '.'`

`N_ACCENT_PER_LINE = 4`

`noise_model.py`

`DEFAULT_TYPO_PROBS = {Typo.TRANSPOSE_CHAR: 0.01, Typo.DELETE_SPELLING_SYMBOL: 0.1, Typo.ADD_SPELLING_SYMBOL: 0, Typo.DELETE_SPACE: 0.01, Typo.ADD_SPACE: 0, Typo.DELETE_PUNCTUATION: 0, Typo.ADD_PUNCTUATION: 0, Typo.DELETE_CHAR: 0.005, Typo.ADD_CHAR: 0.005, Typo.SIMPLIFY_ACCENT: 0.08, Typo.SIMPLIFY_CASE: 0.08, Typo.COMMON_TYPO: 0.05}`

Internals

cmd.py

instantiate_correctors(keyboard, get_layout=True, fast_mode=True, instantiate_emulator=True)

common_args(parser)

cli()

correctors.py

EmulatorCorrector

__reduce__()

cached_type(context, word)

auto_correct(context, keystrokes, word)

auto_complete(context, keystrokes, partial_word)

predict_next_word(context)

emulator.py

Emulator

get_android_devices()

select_keyboard(keyboard)

get_ios_devices()

paste(text)

type_characters(characters)

get_predictions(lang='en')

get_text()

show_keyboards()

LayoutDetector

get_suggestions()

GboardLayoutDetector

get_suggestions()

IosLayoutDetector

get_suggestions()

KbkitproLayoutDetector

get_suggestions()

KbkitossLayoutDetector

get_suggestions()

SwiftkeyLayoutDetector

get_suggestions()

YandexLayoutDetector

get_suggestions()

TappaLayoutDetector

get_suggestions()

FleksyLayoutDetector

get_suggestions()

gesture.py

make_swipe_gesture(control_points)

random_point_around(p, radius)

bernstein_poly(i, n, t)

bezier_curve(control_points, linspace)

accelerated_linspace(n, acceleration)

layout.py

KeyInfo dataclass

Key dataclass

LayoutHelper

_extract_infos(keyboard_layout, ignore_layers_after=None)

_make_virtual_key(idx, initial_bounds)

get_key_info(char)

get_key(pos, klayer_id)

noise_model.py

Typo

NoiseModel

type_till_space(words)

swipe(word)

_introduce_typos(word, error_free=False)

_fuzzy_type(word, error_free=False)

_is_correctable(word)

_get_common_typos()

oracle.py

Oracle

test(corrector, n_proc, seed)

init_tester(fn, lang, custom_keyboard, correctors, seed, track_mistakes)

tester(sentence)

scorer.py

Count dataclass

__add__(count)

__mul__(proportion)

Mistake dataclass

Scorer

add(scorer)

nwp(true_word, predicted_words, context, memory, runtime, domain=None)

acp(true_word, predicted_words, partial_word, context, memory, runtime, domain=None)

acr(true_word, predicted_words, typed_word, context, typos, memory, runtime, domain=None)

swp(true_word, predicted_words, context, memory, runtime, domain=None)

set_domain(domain)

`cmd.py`

`instantiate_correctors(keyboard, get_layout=True, fast_mode=True, instantiate_emulator=True)`

`common_args(parser)`

`cli()`

`correctors.py`

`EmulatorCorrector`

`reduce()`

`cached_type(context, word)`

`auto_correct(context, keystrokes, word)`

`auto_complete(context, keystrokes, partial_word)`

`predict_next_word(context)`

`emulator.py`

`Emulator`

`get_android_devices()`

`select_keyboard(keyboard)`

`get_ios_devices()`

`paste(text)`

`type_characters(characters)`

`get_predictions(lang='en')`

`get_text()`

`show_keyboards()`

`LayoutDetector`

`get_suggestions()`

`GboardLayoutDetector`

`get_suggestions()`

`IosLayoutDetector`

`get_suggestions()`

`KbkitproLayoutDetector`

`get_suggestions()`

`KbkitossLayoutDetector`

`get_suggestions()`

`SwiftkeyLayoutDetector`

`get_suggestions()`

`YandexLayoutDetector`

`get_suggestions()`

`TappaLayoutDetector`

`get_suggestions()`

`FleksyLayoutDetector`

`get_suggestions()`

`gesture.py`

`make_swipe_gesture(control_points)`

`random_point_around(p, radius)`

`bernstein_poly(i, n, t)`

`bezier_curve(control_points, linspace)`

`accelerated_linspace(n, acceleration)`

`layout.py`

`KeyInfo` `dataclass`

`Key` `dataclass`

`LayoutHelper`

`_extract_infos(keyboard_layout, ignore_layers_after=None)`

`_make_virtual_key(idx, initial_bounds)`

`get_key_info(char)`

`get_key(pos, klayer_id)`

`noise_model.py`

`Typo`

`NoiseModel`

`type_till_space(words)`

`swipe(word)`

`_introduce_typos(word, error_free=False)`

`_fuzzy_type(word, error_free=False)`

`_is_correctable(word)`

`_get_common_typos()`

`oracle.py`

`Oracle`

`test(corrector, n_proc, seed)`

`init_tester(fn, lang, custom_keyboard, correctors, seed, track_mistakes)`

`tester(sentence)`

`scorer.py`

`Count` `dataclass`

`add(count)`

`mul(proportion)`

`Mistake` `dataclass`

`Scorer`

`add(scorer)`

`nwp(true_word, predicted_words, context, memory, runtime, domain=None)`

`acp(true_word, predicted_words, partial_word, context, memory, runtime, domain=None)`

`acr(true_word, predicted_words, typed_word, context, typos, memory, runtime, domain=None)`

`swp(true_word, predicted_words, context, memory, runtime, domain=None)`

`set_domain(domain)`

`_score_accuracy(c)`