Building Multilingual Data Science Teams

Should we use R or Python? Yes.

Language Wars!

The Reality

Amost everything that used to make Python awesome that wasn’t in R has been since ported over to R.

And everything that used to make R awesome that wasn’t in Python has been since ported over to Python.

Why Have a Multilingual Team?


  • Larger potential talent pool
  • More tools in the toolbox
  • Dual offering

The Key to a Great Multilingual Data Science Team?

How Empathy Manifests Technically


🌍 Environment management

📦 Package choices

📝 Documentation

How Empathy Manifests Technically

Environment Management

Environment Management


“But it works on my machine”

Environment Management


  1. Operating System
  2. Python and/or R version
  3. Package versions

Environment Management

Operating System & Python / R Version

Third-Party Tools

  • Posit Workbench

Open Source tools

  • Containers (Docker / Podman)
  • Nix

Package Version

Python

  • uv, venv, pipenv, etc.

R

  • {renv}
  • {packrat}

Package Choices

Package Choices

Package Choices: dplyr & polars

library(dplyr)

df <-
    mtcars |>
        filter(
            mpg > 20
        ) |>
        select(wt) |>
        arrange(desc(wt)) |>
        head(5)
#

import polars as pl

df = (
    mtcars
    .filter(
        (pl.col("mpg") > 20)
    )
    .select("wt")
    .sort("wt", descending=True)
    .head(5)
)

Package Choices: dplyr & polars

library(dplyr)

df <-
    mtcars |>
        filter(
            mpg > 20
        ) |>
        select(wt) |>
        arrange(desc(wt)) |>
        head(5)
#
import polars as pl

df = (
    mtcars
    .filter(
        (pl.col("mpg") > 20)
    )
    .select("wt")
    .sort("wt", descending=True)
    .head(5)
)

Package Choices: dplyr & polars

library(dplyr)

df <-
    mtcars |>
        filter(
            mpg > 20
        ) |>
        select(wt) |>
        arrange(desc(wt)) |>
        head(5)
#
import polars as pl

df = (
    mtcars
    .filter(
        (pl.col("mpg") > 20)
    )
    .select("wt")
    .sort("wt", descending=True)
    .head(5)
)

Package Choices: dplyr & polars

library(dplyr)

df <-
    mtcars |>
        filter(
            mpg > 20
        ) |>
        select(wt) |>
        arrange(desc(wt)) |>
        head(5)
#
import polars as pl

df = (
    mtcars
    .filter(
        (pl.col("mpg") > 20)
    )
    .select("wt")
    .sort("wt", descending=True)
    .head(5)
)

Package Choices: dplyr & polars

library(dplyr)

df <-
    mtcars |>
        filter(
            mpg > 20
        ) |>
        select(wt) |>
        arrange(desc(wt)) |>
        head(5)
#
import polars as pl

df = (
    mtcars
    .filter(
        (pl.col("mpg") > 20)
    )
    .select("wt")
    .sort("wt", descending=True)
    .head(5)
)

Package Choices: DuckDB

Package Choices: DuckDB

library(duckdb)

# Connect to DuckDB (in-memory db)
conn <- dbConnect(duckdb::duckdb())

# Define SQL query
query <- "
    SELECT wt
    FROM mtcars
    WHERE mpg > 20
    ORDER BY wt DESC
    LIMIT 5
"

# Execute query and get results
result <- dbGetQuery(conn, query)
import duckdb

# Connect to DuckDB (in-memory db)
conn = duckdb.connect()

# Define SQL query
query = """
    SELECT wt
    FROM mtcars
    WHERE mpg > 20
    ORDER BY wt DESC
    LIMIT 5
"""

# Execute query and get results
result = conn.execute(query).df()

Package Choices: DuckDB

library(duckdb)

# Connect to DuckDB (in-memory db)
conn <- dbConnect(duckdb::duckdb())

# Define SQL query
query <- "
    SELECT wt
    FROM mtcars
    WHERE mpg > 20
    ORDER BY wt DESC
    LIMIT 5
"

# Execute query and get results
result <- dbGetQuery(conn, query)
import duckdb

# Connect to DuckDB (in-memory db)
conn = duckdb.connect()

# Define SQL query
query = """
    SELECT wt
    FROM mtcars
    WHERE mpg > 20
    ORDER BY wt DESC
    LIMIT 5
"""

# Execute query and get results
result = conn.execute(query).df()

Package Choices: DuckDB

library(duckdb)

# Connect to DuckDB (in-memory db)
conn <- dbConnect(duckdb::duckdb())

# Define SQL query
query <- "
    SELECT wt
    FROM mtcars
    WHERE mpg > 20
    ORDER BY wt DESC
    LIMIT 5
"

# Execute query and get results
result <- dbGetQuery(conn, query)
import duckdb

# Connect to DuckDB (in-memory db)
conn = duckdb.connect()

# Define SQL query
query = """
    SELECT wt
    FROM mtcars
    WHERE mpg > 20
    ORDER BY wt DESC
    LIMIT 5
"""

# Execute query and get results
result = conn.execute(query).df()

Package Choices: ggplot2 & plotnine

library(ggplot2)


ggplot(
  mtcars,
  aes(x = wt, y = mpg)
) +
  geom_point(color = "red") +
  geom_smooth(method = "lm")
#
from plotnine import

(
  ggplot(
    mtcars,
    aes(x = "wt", y = "mpg")
  ) +
    geom_point(color = "red") +
    geom_smooth(method = "lm")
)

Package Choices: gt & great-tables

library(gt)

int_cols <- c(
  "cyl", "vs", "am",
  "gear", "carb"
)


gt(mtcars) |>
  tab_header(
    title = "Awesome mtcars",
    subtitle = "With 💙 + GT"
  ) |>
  fmt_number() |>
  fmt_integer(columns = int_cols)
#
from great_tables import GT

int_cols = [
  "cyl", "vs", "am",
  "gear", "carb"
]

(
  GT(mtcars)
  .tab_header(
      title = "Awesome mtcars",
      subtitle = "With 💙 + GT"
  .) |>
  .fmt_number()
  .fmt_integer(columns=int_cols)
)

Documentation

Documentation

Documentation: Function Documentation Syntax

round_up <- function(x, dig) {
    f <- 10 ** dig
    out <- ceiling(x * f) / f
    return(out)
}
def round_up(x, dig):
    f = 10 ** dig
    out = np.ceil(x * f) / f
    return out

Documentation: Function Documentation Syntax

#' Round a number *up* to a
#' certain number of digits.
#'
#' @param x (double) The value
#'   to be rounded.
#' @param dig (int) The
#'   number of digits to round
#'   to.
#'
#' @return The rounded number.
#'
#' @examples
#' # This returns `2.15`
#' round_up(2.141, 2)
round_up <- function(x, dig) {
    f <- 10 ** dig
    out <- ceiling(x * f) / f
    return(out)
}
def round_up(x, dig):
    """
    Round a number *up* to a
    certain number of digits.

    Parameters
    ----------
    x : float
        The value to be
        rounded.
    dig : int
        The number of digits to
        round to.

    Returns
    -------
    float
        The rounded number.

    Examples
    --------
    >>> round_up(2.141, 2)
    2.15
    """
    f = 10 ** dig
    out = np.ceil(x * f) / f
    return out

Documentation: Function Documentation Syntax

#' Round a number *up* to a
#' certain number of digits.
#'
#' @param x (double) The value
#'   to be rounded.
#' @param dig (int) The
#'   number of digits to round
#'   to.
#'
#' @return The rounded number.
#'
#' @examples
#' # This returns `2.15`
#' round_up(2.141, 2)
round_up <- function(x, dig) {
    f <- 10 ** dig
    out <- ceiling(x * f) / f
    return(out)
}
def round_up(x, dig):
    """
    Round a number *up* to a
    certain number of digits.

    Parameters
    ----------
    x : float
        The value to be
        rounded.
    dig : int
        The number of digits to
        round to.

    Returns
    -------
    float
        The rounded number.

    Examples
    --------
    >>> round_up(2.141, 2)
    2.15
    """
    f = 10 ** dig
    out = np.ceil(x * f) / f
    return out

Documentation: Function Documentation Syntax

#' Round a number *up* to a
#' certain number of digits.
#'
#' @param x (double) The value
#'   to be rounded.
#' @param dig (int) The
#'   number of digits to round
#'   to.
#'
#' @return The rounded number.
#'
#' @examples
#' # This returns `2.15`
#' round_up(2.141, 2)
round_up <- function(x, dig) {
    f <- 10 ** dig
    out <- ceiling(x * f) / f
    return(out)
}
def round_up(x, dig):
    """
    Round a number *up* to a
    certain number of digits.

    Parameters
    ----------
    x : float
        The value to be
        rounded.
    dig : int
        The number of digits to
        round to.

    Returns
    -------
    float
        The rounded number.

    Examples
    --------
    >>> round_up(2.141, 2)
    2.15
    """
    f = 10 ** dig
    out = np.ceil(x * f) / f
    return out

Documentation: Function Documentation Syntax

#' Round a number *up* to a
#' certain number of digits.
#'
#' @param x (double) The value
#'   to be rounded.
#' @param dig (int) The
#'   number of digits to round
#'   to.
#'
#' @return The rounded number.
#'
#' @examples
#' # This returns `2.15`
#' round_up(2.141, 2)
round_up <- function(x, dig) {
    f <- 10 ** dig
    out <- ceiling(x * f) / f
    return(out)
}
def round_up(x, dig):
    """
    Round a number *up* to a
    certain number of digits.

    Parameters
    ----------
    x : float
        The value to be
        rounded.
    dig : int
        The number of digits to
        round to.

    Returns
    -------
    float
        The rounded number.

    Examples
    --------
    >>> round_up(2.141, 2)
    2.15
    """
    f = 10 ** dig
    out = np.ceil(x * f) / f
    return out

Documentation: Function Documentation Syntax

#' Round a number *up* to a
#' certain number of digits.
#'
#' @param x (double) The value
#'   to be rounded.
#' @param dig (int) The
#'   number of digits to round
#'   to.
#'
#' @return The rounded number.
#'
#' @examples
#' # This returns `2.15`
#' round_up(2.141, 2)
round_up <- function(x, dig) {
    f <- 10 ** dig
    out <- ceiling(x * f) / f
    return(out)
}
def round_up(x, dig):
    """
    Round a number *up* to a
    certain number of digits.

    Parameters
    ----------
    x : float
        The value to be
        rounded.
    dig : int
        The number of digits to
        round to.

    Returns
    -------
    float
        The rounded number.

    Examples
    --------
    >>> round_up(2.141, 2)
    2.15
    """
    f = 10 ** dig
    out = np.ceil(x * f) / f
    return out

Documentation: Issues & PRs


Documentation: Issues


Anatomy of a Good Issue

Overview

An overview of the issue or proposed enhancement, including rationale.


Reproducible Example

Some code that someone else can run to reproduce the issue or show the current shortfall that the proposed enhancement will overcome.


Potential Solution(s)

Discussion (possibly including code) regarding possible solution(s).

Documentation: Issues

Documentation: Pull Requests


Anatomy of a Good Pull Request

Overview

The purpose of the pull request and the associated Issue(s) it addresses.


Details

The technical aspects of how the Issue was addressed in code, as well as any design decisions that were made along the way and/or hurdles that were overcome.


How to Test

Provide instructions and code that the reviewer can run to see the impact of your changes.

Documentation: Pull Requests

Final Thoughts

Get In Touch With Me


ketchbrook.com


in/michaeljthomas2


@mike-thomas.bsky.social


mthomas-ketchbrook | ketchbrookanalytics