Source code for dopo.activity_filter

"""
This module contains functions to filter activities from a database based on
a YAML file with filter specifications.
"""

import yaml
import bw2data
from pathlib import Path

# Sector filter functions from premise
# ---------------------------------------------------


[docs] def _act_fltr( database: list, fltr: [str, list, dict] = None, mask: [str, list, dict] = None, ): """Filter `database` for activities_list matching field contents given by `fltr` excluding strings in `mask`. `fltr`: string, list of strings or dictionary. If a string is provided, it is used to match the name field from the start (*startswith*). If a list is provided, all strings in the lists are used and dataframes_dict are joined (*or*). A dict can be given in the form <fieldname>: <str> to filter for <str> in <fieldname>. `mask`: used in the same way as `fltr`, but filters add up with each other (*and*). `filter_exact` and `mask_exact`: boolean, set `True` to only allow for exact matches. :param database: A lice cycle inventory database :type database: brightway2 database object :param fltr: value(s) to filter with. :type fltr: Union[str, lst, dict] :param mask: value(s) to filter with. :type mask: Union[str, lst, dict] :return: list of activity data set names :rtype: list """ if fltr is None: fltr = {} if mask is None: mask = {} # default field is name if isinstance(fltr, (list, str)): fltr = {"name": fltr} if isinstance(mask, (list, str)): mask = {"name": mask} assert len(fltr) > 0, "Filter dict must not be empty." # find `act` in `database` that match `fltr` # and do not match `mask` filters = database for field, value in fltr.items(): if isinstance(value, list): for val in value: filters = [a for a in filters if val in a[field]] else: filters = [a for a in filters if value in a[field]] if mask: for field, value in mask.items(): if isinstance(value, list): for val in value: filters = [f for f in filters if val not in f[field]] else: filters = [f for f in filters if value not in f[field]] return filters
[docs] def generate_sets_from_filters(filtr: dict, database: bw2data.Database) -> dict: """ Generate a dictionary with sets of activity names for technologies from the filter specifications. :param mapping: path to the YAML file with filter specifications :type mapping: str :param database: A lice cycle inventory database :type database: brightway2 database object :return: A dictionary with sets of activity names for technologies :rtype: dict """ names = [] for entry in filtr.values(): if "fltr" in entry: if isinstance(entry["fltr"], dict): if "name" in entry["fltr"]: names.extend(entry["fltr"]["name"]) elif isinstance(entry["fltr"], list): names.extend(entry["fltr"]) else: names.append(entry["fltr"]) subset = [a for a in database if any(x in a["name"] for x in names)] techs = { tech: _act_fltr(subset, fltr.get("fltr"), fltr.get("mask")) for tech, fltr in filtr.items() } mapping = {tech: {act for act in actlst} for tech, actlst in techs.items()} return mapping
[docs] def _get_mapping(filepath: [str, Path]) -> dict: """ Load a YAML file and return a dictionary given a variable. :param filepath: YAML file path :param var: variable to return the dictionary for. :param model: if provided, only return the dictionary for this model. :return: a dictionary """ with open(filepath, "r", encoding="utf-8") as stream: return yaml.full_load(stream)