Skip to content

You can check the documentation of the GraphNodesLoader class here.

Module to load data into a Neo4j graph database for different node types.

This module provides the GraphDataLoader class, which allows loading data for specific node types into a Neo4j database. Users can provide the connection details and node label to load the data.

Classes:

Name Description
GraphDataLoader

Class to load data into a Neo4j graph database for different node types.

Functions:

Name Description
main

Main function to parse command-line arguments and load data for the specified node type.

GraphNodesLoader

Class to load data into a Neo4j graph database for different node types.

Attributes:

Name Type Description
driver

The Neo4j driver instance.

node_data_adder

An instance of the AddGraphNodes class.

label_mapping

A dictionary mapping node labels to their unique properties and file paths.

Source code in chemgraphbuilder/graph_nodes_loader.py
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
class GraphNodesLoader:
    """
    Class to load data into a Neo4j graph database for different node types.

    Attributes:
        driver: The Neo4j driver instance.
        node_data_adder: An instance of the AddGraphNodes class.
        label_mapping: A dictionary mapping node labels to their unique properties and file paths.
    """

    def __init__(self, uri, username, password):
        """
        Initializes the GraphDataLoader with Neo4j connection details.

        Args:
            uri (str): The URI of the Neo4j database.
            username (str): The username for the Neo4j database.
            password (str): The password for the Neo4j database.
        """
        self.driver = GraphDatabase.driver(uri, auth=(username, password))
        self.logger = logging.getLogger(__name__)  # Define the logger
        self.logger.info("GraphNodesLoader class initialized.")
        self.node_data_adder = AddGraphNodes(self.driver)
        self.label_mapping = {
            "Compound": {
                "unique_property": "CompoundID",
                "file_path": "Data/Nodes/Compound_Properties_Processed.csv"
            },
            "BioAssay": {
                "unique_property": "AssayID",
                "file_path": "Data/Nodes/Assay_Properties_Processed.csv"
            },
            "Gene": {
                "unique_property": "GeneID",
                "file_path": "Data/Nodes/Gene_Properties_Processed.csv"
            },
            "Protein": {
                "unique_property": "ProteinRefSeqAccession",
                "file_path": "Data/Nodes/Protein_Properties_Processed.csv"
            }
        }

    def create_uniqueness_constraint(self, label, unique_property):
        """
        Creates a uniqueness constraint for a given node label and property.

        Args:
            label (str): The label of the node.
            unique_property (str): The property to enforce uniqueness on.
        """
        self.node_data_adder.create_uniqueness_constraint(
            self.driver, label=label, unique_property=unique_property
        )

    def process_and_add_nodes(self, file_path, label, unique_property):
        """
        Processes and adds nodes from a CSV file to the Neo4j database.

        Args:
            file_path (str): The path to the CSV file containing node data.
            label (str): The label of the node.
            unique_property (str): The unique property of the node.
        """
        self.node_data_adder.process_and_add_nodes(
            file_path, label=label, unique_property=unique_property
        )

    def load_data_for_node_type(self, label):
        """
        Loads data for a specific node type into the Neo4j database.

        Args:
            label (str): The label of the node.
        """
        if label not in self.label_mapping:
            self.logger.error("No mapping found for label: %s", label)
            return

        unique_property = self.label_mapping[label]["unique_property"]
        file_path = self.label_mapping[label]["file_path"]

        self.create_uniqueness_constraint(label, unique_property)
        self.process_and_add_nodes(file_path, label, unique_property)

__init__(uri, username, password)

Initializes the GraphDataLoader with Neo4j connection details.

Parameters:

Name Type Description Default
uri str

The URI of the Neo4j database.

required
username str

The username for the Neo4j database.

required
password str

The password for the Neo4j database.

required
Source code in chemgraphbuilder/graph_nodes_loader.py
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
def __init__(self, uri, username, password):
    """
    Initializes the GraphDataLoader with Neo4j connection details.

    Args:
        uri (str): The URI of the Neo4j database.
        username (str): The username for the Neo4j database.
        password (str): The password for the Neo4j database.
    """
    self.driver = GraphDatabase.driver(uri, auth=(username, password))
    self.logger = logging.getLogger(__name__)  # Define the logger
    self.logger.info("GraphNodesLoader class initialized.")
    self.node_data_adder = AddGraphNodes(self.driver)
    self.label_mapping = {
        "Compound": {
            "unique_property": "CompoundID",
            "file_path": "Data/Nodes/Compound_Properties_Processed.csv"
        },
        "BioAssay": {
            "unique_property": "AssayID",
            "file_path": "Data/Nodes/Assay_Properties_Processed.csv"
        },
        "Gene": {
            "unique_property": "GeneID",
            "file_path": "Data/Nodes/Gene_Properties_Processed.csv"
        },
        "Protein": {
            "unique_property": "ProteinRefSeqAccession",
            "file_path": "Data/Nodes/Protein_Properties_Processed.csv"
        }
    }

create_uniqueness_constraint(label, unique_property)

Creates a uniqueness constraint for a given node label and property.

Parameters:

Name Type Description Default
label str

The label of the node.

required
unique_property str

The property to enforce uniqueness on.

required
Source code in chemgraphbuilder/graph_nodes_loader.py
65
66
67
68
69
70
71
72
73
74
75
def create_uniqueness_constraint(self, label, unique_property):
    """
    Creates a uniqueness constraint for a given node label and property.

    Args:
        label (str): The label of the node.
        unique_property (str): The property to enforce uniqueness on.
    """
    self.node_data_adder.create_uniqueness_constraint(
        self.driver, label=label, unique_property=unique_property
    )

load_data_for_node_type(label)

Loads data for a specific node type into the Neo4j database.

Parameters:

Name Type Description Default
label str

The label of the node.

required
Source code in chemgraphbuilder/graph_nodes_loader.py
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
def load_data_for_node_type(self, label):
    """
    Loads data for a specific node type into the Neo4j database.

    Args:
        label (str): The label of the node.
    """
    if label not in self.label_mapping:
        self.logger.error("No mapping found for label: %s", label)
        return

    unique_property = self.label_mapping[label]["unique_property"]
    file_path = self.label_mapping[label]["file_path"]

    self.create_uniqueness_constraint(label, unique_property)
    self.process_and_add_nodes(file_path, label, unique_property)

process_and_add_nodes(file_path, label, unique_property)

Processes and adds nodes from a CSV file to the Neo4j database.

Parameters:

Name Type Description Default
file_path str

The path to the CSV file containing node data.

required
label str

The label of the node.

required
unique_property str

The unique property of the node.

required
Source code in chemgraphbuilder/graph_nodes_loader.py
77
78
79
80
81
82
83
84
85
86
87
88
def process_and_add_nodes(self, file_path, label, unique_property):
    """
    Processes and adds nodes from a CSV file to the Neo4j database.

    Args:
        file_path (str): The path to the CSV file containing node data.
        label (str): The label of the node.
        unique_property (str): The unique property of the node.
    """
    self.node_data_adder.process_and_add_nodes(
        file_path, label=label, unique_property=unique_property
    )

main()

Main function to parse command-line arguments and load data for the specified node type.

Source code in chemgraphbuilder/graph_nodes_loader.py
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
def main():
    """
    Main function to parse command-line arguments and load data for the specified node type.
    """
    parser = argparse.ArgumentParser(description="Load data into"
                                                 "Neo4j graph database.")
    parser.add_argument('--uri', required=True,
                        help='URI for the Neo4j database')
    parser.add_argument('--username', required=True,
                        help='Username for the Neo4j database')
    parser.add_argument('--password', required=True,
                        help='Password for the Neo4j database')
    parser.add_argument('--label', required=True,
                        help='Label of the node')

    args = parser.parse_args()

    # Create an instance of GraphDataLoader and load data for the specified node type
    graph_nodes_loader = GraphNodesLoader(args.uri, args.username, args.password)
    graph_nodes_loader.load_data_for_node_type(args.label)