Reference Table Utilities¶

Functions to join descriptive names to estimation results.

Auto-Enhancement¶

PyFIA automatically adds descriptive names for common grouping columns:

Grouping Column	Auto-Added Column
`FORTYPCD`	`FOREST_TYPE_GROUP`
`OWNGRPCD`	`OWNERSHIP_GROUP`

import pyfia

db = pyfia.FIA("georgia.duckdb")
db.clip_by_state("GA")

# Forest type names added automatically!
result = pyfia.area(db, grp_by="FORTYPCD")
print(result.columns)
# ['YEAR', 'FORTYPCD', 'FOREST_TYPE_GROUP', 'AREA', ...]

# Ownership names added automatically!
result = pyfia.volume(db, grp_by="OWNGRPCD")
print(result.columns)
# ['YEAR', 'OWNGRPCD', 'OWNERSHIP_GROUP', 'VOLCFNET_TOTAL', ...]

Manual Reference Table Joins¶

For columns that aren't auto-enhanced (like species), use these functions:

# Get volume by species
result = pyfia.volume(db, grp_by="SPCD")

# Add species names manually
result = pyfia.join_species_names(result, db)
print(result)

Functions¶

join_species_names¶

Add common and scientific species names.

join_species_names ¶

join_species_names(data: DataFrame, db: str | Path | FIA, species_col: str = 'SPCD', common_name_col: str = 'COMMON_NAME', scientific_name_col: str | None = 'SCIENTIFIC_NAME', include_scientific: bool = False) -> DataFrame

Join species names from REF_SPECIES table.

PARAMETER	DESCRIPTION
`data`	DataFrame containing species codes TYPE: `DataFrame`
`db`	FIA database object or path to database TYPE: `FIA or str`
`species_col`	Column name containing species codes TYPE: `str` DEFAULT: `"SPCD"`
`common_name_col`	Name for the joined common name column TYPE: `str` DEFAULT: `"COMMON_NAME"`
`scientific_name_col`	Name for the joined scientific name column TYPE: `str` DEFAULT: `'SCIENTIFIC_NAME'`
`include_scientific`	Whether to include scientific names TYPE: `bool` DEFAULT: `False`

RETURNS	DESCRIPTION
`DataFrame`	Original data with species names added

Examples:

>>> results = tpa(db, bySpecies=True)
>>> results_with_names = join_species_names(results, db)

Source code in src/pyfia/utils/reference_tables.py

def join_species_names(
    data: pl.DataFrame,
    db: str | Path | FIA,
    species_col: str = "SPCD",
    common_name_col: str = "COMMON_NAME",
    scientific_name_col: str | None = "SCIENTIFIC_NAME",
    include_scientific: bool = False,
) -> pl.DataFrame:
    """
    Join species names from REF_SPECIES table.

    Parameters
    ----------
    data : pl.DataFrame
        DataFrame containing species codes
    db : FIA or str
        FIA database object or path to database
    species_col : str, default "SPCD"
        Column name containing species codes
    common_name_col : str, default "COMMON_NAME"
        Name for the joined common name column
    scientific_name_col : str, optional
        Name for the joined scientific name column
    include_scientific : bool, default False
        Whether to include scientific names

    Returns
    -------
    pl.DataFrame
        Original data with species names added

    Examples
    --------
    >>> results = tpa(db, bySpecies=True)
    >>> results_with_names = join_species_names(results, db)
    """
    if isinstance(db, (str, Path)):
        db = FIA(db)

    if species_col not in data.columns:
        return data

    # Determine columns to read
    ref_cols = ["SPCD", "COMMON_NAME"]
    if include_scientific:
        ref_cols.append("SCIENTIFIC_NAME")

    # Read species reference table
    species = db._reader.read_table("REF_SPECIES", columns=ref_cols)

    if hasattr(species, "collect"):
        species = species.collect()

    # Cast SPCD to Int64 for consistent joining
    species = species.with_columns(pl.col("SPCD").cast(pl.Int64))

    # Rename columns
    rename_map = {"SPCD": species_col, "COMMON_NAME": common_name_col}
    if include_scientific and scientific_name_col:
        rename_map["SCIENTIFIC_NAME"] = scientific_name_col

    species = species.rename(rename_map).unique(species_col)

    # Cast the join column in data to match
    data = data.with_columns(pl.col(species_col).cast(pl.Int64))

    return data.join(species, on=species_col, how="left")

Example:

# Basic usage
result = pyfia.join_species_names(volume_data, db)

# Include scientific name
result = pyfia.join_species_names(
    volume_data,
    db,
    include_scientific=True
)

join_forest_type_names¶

Add forest type group names.

join_forest_type_names ¶

join_forest_type_names(data: DataFrame, db: str | Path | FIA, forest_type_col: str = 'FORTYPCD', name_col: str = 'FOREST_TYPE_NAME') -> DataFrame

Join forest type names from REF_FOREST_TYPE table.

PARAMETER	DESCRIPTION
`data`	DataFrame containing forest type codes TYPE: `DataFrame`
`db`	FIA database object or path to database TYPE: `FIA or str`
`forest_type_col`	Column name containing forest type codes TYPE: `str` DEFAULT: `"FORTYPCD"`
`name_col`	Name for the joined forest type name column TYPE: `str` DEFAULT: `"FOREST_TYPE_NAME"`

RETURNS	DESCRIPTION
`DataFrame`	Original data with forest type names added

Examples:

>>> results = area(db, grp_by=['FORTYPCD'], totals=True)
>>> results_with_names = join_forest_type_names(results, db)

Source code in src/pyfia/utils/reference_tables.py

def join_forest_type_names(
    data: pl.DataFrame,
    db: str | Path | FIA,
    forest_type_col: str = "FORTYPCD",
    name_col: str = "FOREST_TYPE_NAME",
) -> pl.DataFrame:
    """
    Join forest type names from REF_FOREST_TYPE table.

    Parameters
    ----------
    data : pl.DataFrame
        DataFrame containing forest type codes
    db : FIA or str
        FIA database object or path to database
    forest_type_col : str, default "FORTYPCD"
        Column name containing forest type codes
    name_col : str, default "FOREST_TYPE_NAME"
        Name for the joined forest type name column

    Returns
    -------
    pl.DataFrame
        Original data with forest type names added

    Examples
    --------
    >>> results = area(db, grp_by=['FORTYPCD'], totals=True)
    >>> results_with_names = join_forest_type_names(results, db)
    """
    # Ensure we have a FIA object
    if isinstance(db, (str, Path)):
        db = FIA(db)

    # Check if column exists
    if forest_type_col not in data.columns:
        return data

    # Read forest type reference table
    forest_types = db._reader.read_table(
        "REF_FOREST_TYPE", columns=["VALUE", "MEANING"]
    )

    # Handle lazy frames
    if hasattr(forest_types, "collect"):
        forest_types = forest_types.collect()

    # Prepare reference data
    forest_types = (
        forest_types.with_columns(pl.col("VALUE").cast(pl.Int64))
        .rename({"VALUE": forest_type_col, "MEANING": name_col})
        .unique(forest_type_col)
    )

    # Join with original data
    return data.join(forest_types, on=forest_type_col, how="left")

Example:

area_by_type = pyfia.area(db, grp_by="FORTYPGRPCD")
result = pyfia.join_forest_type_names(area_by_type, db)
print(result)

join_state_names¶

Add state names and abbreviations.

join_state_names ¶

join_state_names(data: DataFrame, db: str | Path | FIA, state_col: str = 'STATECD', state_name_col: str = 'STATE_NAME', state_abbr_col: str | None = 'STATE_ABBR', include_abbr: bool = True) -> DataFrame

Join state names and abbreviations from REF_STATE.

PARAMETER	DESCRIPTION
`data`	DataFrame containing state codes TYPE: `DataFrame`
`db`	FIA database object or path to database TYPE: `FIA or str`
`state_col`	Column name containing state codes TYPE: `str` DEFAULT: `"STATECD"`
`state_name_col`	Name for the joined state name column TYPE: `str` DEFAULT: `"STATE_NAME"`
`state_abbr_col`	Name for the joined state abbreviation column TYPE: `str` DEFAULT: `'STATE_ABBR'`
`include_abbr`	Whether to include state abbreviations TYPE: `bool` DEFAULT: `True`

RETURNS	DESCRIPTION
`DataFrame`	Original data with state names added

Source code in src/pyfia/utils/reference_tables.py

def join_state_names(
    data: pl.DataFrame,
    db: str | Path | FIA,
    state_col: str = "STATECD",
    state_name_col: str = "STATE_NAME",
    state_abbr_col: str | None = "STATE_ABBR",
    include_abbr: bool = True,
) -> pl.DataFrame:
    """
    Join state names and abbreviations from REF_STATE.

    Parameters
    ----------
    data : pl.DataFrame
        DataFrame containing state codes
    db : FIA or str
        FIA database object or path to database
    state_col : str, default "STATECD"
        Column name containing state codes
    state_name_col : str, default "STATE_NAME"
        Name for the joined state name column
    state_abbr_col : str, optional
        Name for the joined state abbreviation column
    include_abbr : bool, default True
        Whether to include state abbreviations

    Returns
    -------
    pl.DataFrame
        Original data with state names added
    """
    if isinstance(db, (str, Path)):
        db = FIA(db)

    if state_col not in data.columns:
        return data

    # Determine columns to read
    ref_cols = ["VALUE", "MEANING"]
    if include_abbr:
        ref_cols.append("ABBR")

    # Read state reference table
    states = db._reader.read_table("REF_STATE", columns=ref_cols)

    if hasattr(states, "collect"):
        states = states.collect()

    # Prepare reference data
    states = states.with_columns(pl.col("VALUE").cast(pl.Int64))

    # Rename columns
    rename_map = {"VALUE": state_col, "MEANING": state_name_col}
    if include_abbr and state_abbr_col:
        rename_map["ABBR"] = state_abbr_col

    states = states.rename(rename_map).unique(state_col)

    return data.join(states, on=state_col, how="left")

Example:

volume_by_state = pyfia.volume(db, grp_by="STATECD")
result = pyfia.join_state_names(volume_by_state, db)
print(result)

join_multiple_references¶

Join multiple reference tables at once.

join_multiple_references ¶

join_multiple_references(data: DataFrame, db: str | Path | FIA, forest_type: bool = False, species: bool = False, state: bool = False, **kwargs) -> DataFrame

Join multiple reference tables at once.

PARAMETER	DESCRIPTION
`data`	DataFrame to join reference tables to TYPE: `DataFrame`
`db`	FIA database object or path to database TYPE: `FIA or str`
`forest_type`	Whether to join forest type names TYPE: `bool` DEFAULT: `False`
`species`	Whether to join species names TYPE: `bool` DEFAULT: `False`
`state`	Whether to join state names TYPE: `bool` DEFAULT: `False`
`**kwargs`	Additional arguments passed to individual join functions DEFAULT: `{}`

RETURNS	DESCRIPTION
`DataFrame`	Data with requested reference tables joined

Examples:

>>> results = area(db, grp_by=['STATECD', 'FORTYPCD'], totals=True)
>>> results_with_names = join_multiple_references(
...     results, db,
...     forest_type=True,
...     state=True
... )

Source code in src/pyfia/utils/reference_tables.py

def join_multiple_references(
    data: pl.DataFrame,
    db: str | Path | FIA,
    forest_type: bool = False,
    species: bool = False,
    state: bool = False,
    **kwargs,
) -> pl.DataFrame:
    """
    Join multiple reference tables at once.

    Parameters
    ----------
    data : pl.DataFrame
        DataFrame to join reference tables to
    db : FIA or str
        FIA database object or path to database
    forest_type : bool, default False
        Whether to join forest type names
    species : bool, default False
        Whether to join species names
    state : bool, default False
        Whether to join state names
    **kwargs
        Additional arguments passed to individual join functions

    Returns
    -------
    pl.DataFrame
        Data with requested reference tables joined

    Examples
    --------
    >>> results = area(db, grp_by=['STATECD', 'FORTYPCD'], totals=True)
    >>> results_with_names = join_multiple_references(
    ...     results, db,
    ...     forest_type=True,
    ...     state=True
    ... )
    """
    if forest_type:
        data = join_forest_type_names(data, db, **kwargs)

    if species:
        data = join_species_names(data, db, **kwargs)

    if state:
        data = join_state_names(data, db, **kwargs)

    return data

Example:

# Add both species and forest type names
result = pyfia.join_multiple_references(
    data,
    db,
    tables=["species", "forest_type"]
)

Available Reference Tables¶

Code Column	Reference Table	Name Column
`SPCD`	`REF_SPECIES`	Common name, Scientific name
`FORTYPCD`	`REF_FOREST_TYPE`	Forest type name
`FORTYPGRPCD`	`REF_FOREST_TYPE_GROUP`	Forest type group name
`STATECD`	`REF_STATE`	State name, abbreviation
`OWNGRPCD`	`REF_OWNGRPCD`	Ownership group name