Commit 963ee8d2 authored by ina's avatar ina
Browse files

code clean-up

parent 4bf6445b
city,ID,label
http://dbpedia.org/resource/Sao_Paulo,108,1
http://dbpedia.org/resource/Guangzhou,144,1
http://dbpedia.org/resource/Munich,12,2
http://dbpedia.org/resource/Dubai,97,1
http://dbpedia.org/resource/San_Francisco,16,2
http://dbpedia.org/resource/Shenyang,161,1
http://dbpedia.org/resource/San_Juan,73,1
http://dbpedia.org/resource/Osaka,53,2
http://dbpedia.org/resource/Bangalore,168,1
http://dbpedia.org/resource/Nassau,128,1
http://dbpedia.org/resource/Kuala_Lumpur,83,1
http://dbpedia.org/resource/Leipzig,76,1
http://dbpedia.org/resource/Tashkent,191,0
http://dbpedia.org/resource/Seattle,39,2
http://dbpedia.org/resource/Detroit,65,2
http://dbpedia.org/resource/Pointe_Noire,211,0
http://dbpedia.org/resource/San_Jose,96,1
http://dbpedia.org/resource/Ulsan,119,1
http://dbpedia.org/resource/New_York,50,2
http://dbpedia.org/resource/Birmingham,60,2
http://dbpedia.org/resource/Manaus,134,1
http://dbpedia.org/resource/Atlanta,34,2
http://dbpedia.org/resource/Nanjing,153,1
http://dbpedia.org/resource/Santa_Cruz,148,1
http://dbpedia.org/resource/Pittsburgh,46,2
http://dbpedia.org/resource/Bangkok,112,1
http://dbpedia.org/resource/Boston,48,2
http://dbpedia.org/resource/Omuta,68,1
http://dbpedia.org/resource/Damascus,165,1
http://dbpedia.org/resource/Bangui,208,0
http://dbpedia.org/resource/Libreville,146,1
http://dbpedia.org/resource/Luxembourg,22,2
http://dbpedia.org/resource/Jeddah,151,1
http://dbpedia.org/resource/Beijing,140,1
http://dbpedia.org/resource/Montreal,27,2
http://dbpedia.org/resource/Yangon,176,0
http://dbpedia.org/resource/Cotonou,178,0
http://dbpedia.org/resource/Lagos,197,0
http://dbpedia.org/resource/Panama_City,92,1
http://dbpedia.org/resource/Zagreb,126,1
city,ID,label
http://dbpedia.org/resource/Katsuyama,69,1
http://dbpedia.org/resource/Hanoi,170,1
http://dbpedia.org/resource/Quito,115,1
http://dbpedia.org/resource/Conakry,196,0
http://dbpedia.org/resource/Port_Louis,80,1
http://dbpedia.org/resource/Medellin,141,1
http://dbpedia.org/resource/Bogota,127,1
http://dbpedia.org/resource/Wellington,31,2
http://dbpedia.org/resource/Amsterdam,10,2
http://dbpedia.org/resource/Asuncion,110,1
http://dbpedia.org/resource/Dacca,184,0
http://dbpedia.org/resource/Melbourne,13,2
http://dbpedia.org/resource/Taipei,81,1
http://dbpedia.org/resource/Paris,24,2
http://dbpedia.org/resource/Jerusalem,79,1
http://dbpedia.org/resource/Barcelona,61,2
http://dbpedia.org/resource/Milan,67,2
http://dbpedia.org/resource/Jakarta,143,1
http://dbpedia.org/resource/Islamabad,149,1
http://dbpedia.org/resource/Brazzaville,212,0
http://dbpedia.org/resource/Guayaquil,123,1
http://dbpedia.org/resource/Accra,133,1
http://dbpedia.org/resource/Caracas,122,1
http://dbpedia.org/resource/Abidjan,147,1
http://dbpedia.org/resource/Hamburg,30,2
http://dbpedia.org/resource/Tripoli,180,0
http://dbpedia.org/resource/Kinshasa,205,0
http://dbpedia.org/resource/Geneva,6,2
http://dbpedia.org/resource/Toronto,20,2
http://dbpedia.org/resource/Tsukuba,58,2
http://dbpedia.org/resource/St._Petersburg,159,1
http://dbpedia.org/resource/Jilin,172,1
http://dbpedia.org/resource/St._Louis,56,2
http://dbpedia.org/resource/Tunis,99,1
http://dbpedia.org/resource/Tokyo,28,2
http://dbpedia.org/resource/Nagoya,54,2
http://dbpedia.org/resource/Nouakchott,200,0
http://dbpedia.org/resource/Istanbul,94,1
http://dbpedia.org/resource/Ljubljana,86,1
http://dbpedia.org/resource/Sofia,120,1
http://dbpedia.org/resource/Beirut,156,1
http://dbpedia.org/resource/Kuwait_City,130,1
http://dbpedia.org/resource/Baghdad,209,0
http://dbpedia.org/resource/Montevideo,72,1
http://dbpedia.org/resource/Djibouti,169,1
http://dbpedia.org/resource/Rayong,135,1
http://dbpedia.org/resource/Warsaw,88,1
http://dbpedia.org/resource/London,35,2
http://dbpedia.org/resource/Limassol,84,1
http://dbpedia.org/resource/Cape_Town,82,1
http://dbpedia.org/resource/Vilnius,90,1
http://dbpedia.org/resource/Lisbon,62,2
http://dbpedia.org/resource/Noumea,106,1
http://dbpedia.org/resource/Minsk,175,0
http://dbpedia.org/resource/Nairobi,142,1
http://dbpedia.org/resource/Ho_Chi_Minh_City,154,1
http://dbpedia.org/resource/Seoul,93,1
http://dbpedia.org/resource/Algiers,185,0
http://dbpedia.org/resource/Riyadh,150,1
http://dbpedia.org/resource/Bucharest,109,1
http://dbpedia.org/resource/Vientiane,155,1
http://dbpedia.org/resource/Rio_De_Janeiro,111,1
http://dbpedia.org/resource/Prague,77,1
http://dbpedia.org/resource/Hong_Kong,70,1
http://dbpedia.org/resource/Luanda,207,0
http://dbpedia.org/resource/Frankfurt,11,2
http://dbpedia.org/resource/Santo_Domingo,114,1
http://dbpedia.org/resource/Lusaka,145,1
http://dbpedia.org/resource/Tirana,174,0
http://dbpedia.org/resource/Mumbai,162,1
http://dbpedia.org/resource/Athens,85,1
http://dbpedia.org/resource/Guatemala_City,137,1
http://dbpedia.org/resource/Lima,113,1
http://dbpedia.org/resource/Ouagadougou,206,0
http://dbpedia.org/resource/Niamey,203,0
http://dbpedia.org/resource/Portland,52,2
http://dbpedia.org/resource/Baku,190,0
http://dbpedia.org/resource/Kazan,182,0
http://dbpedia.org/resource/Oslo,17,2
http://dbpedia.org/resource/Skopje,158,1
http://dbpedia.org/resource/Houston,57,2
http://dbpedia.org/resource/Calgary,36,2
http://dbpedia.org/resource/Minneapolis,51,2
http://dbpedia.org/resource/Lome,193,0
http://dbpedia.org/resource/Maputo,194,0
http://dbpedia.org/resource/Port_Au_Prince,183,0
http://dbpedia.org/resource/Adelaide,21,2
http://dbpedia.org/resource/Cleveland,59,2
http://dbpedia.org/resource/Riga,91,1
http://dbpedia.org/resource/Brasilia,103,1
http://dbpedia.org/resource/Auckland,7,2
http://dbpedia.org/resource/Ndjamena,198,0
http://dbpedia.org/resource/Dar_Es_Salaam,192,0
http://dbpedia.org/resource/Johor_Baharu,107,1
http://dbpedia.org/resource/Budapest,75,1
http://dbpedia.org/resource/Harare,116,1
http://dbpedia.org/resource/Shanghai,136,1
http://dbpedia.org/resource/Kiev,157,1
http://dbpedia.org/resource/Bamako,204,0
http://dbpedia.org/resource/Winston_Salem,41,2
http://dbpedia.org/resource/Blantyre,138,1
http://dbpedia.org/resource/Papeete,98,1
http://dbpedia.org/resource/Antananarivo,195,0
http://dbpedia.org/resource/Managua,167,1
http://dbpedia.org/resource/Havana,188,0
http://dbpedia.org/resource/Vancouver,1,2
http://dbpedia.org/resource/Yokohama,43,2
http://dbpedia.org/resource/Yokkaichi,66,2
http://dbpedia.org/resource/Nurnberg,25,2
http://dbpedia.org/resource/Moscow,160,1
http://dbpedia.org/resource/Johannesburg,89,1
http://dbpedia.org/resource/Buenos_Aires,74,1
http://dbpedia.org/resource/Lahore,163,1
http://dbpedia.org/resource/Abu_Dhabi,102,1
http://dbpedia.org/resource/Sydney,5,2
http://dbpedia.org/resource/Monterrey,100,1
http://dbpedia.org/resource/Copenhagen,8,2
http://dbpedia.org/resource/Dublin,40,2
http://dbpedia.org/resource/Almaty,187,0
http://dbpedia.org/resource/Mexico_City,131,1
http://dbpedia.org/resource/Tallinn,105,1
http://dbpedia.org/resource/Glasgow,55,2
http://dbpedia.org/resource/Cairo,121,1
http://dbpedia.org/resource/Sanaa,199,0
http://dbpedia.org/resource/Dakar,152,1
http://dbpedia.org/resource/Madrid,49,2
http://dbpedia.org/resource/Bern,4,2
http://dbpedia.org/resource/Stockholm,23,2
http://dbpedia.org/resource/Vienna,3,2
http://dbpedia.org/resource/Belgrade,202,0
http://dbpedia.org/resource/San_Salvador,171,1
http://dbpedia.org/resource/Ludwigshafen,32,2
http://dbpedia.org/resource/Port_Harcourt,201,0
http://dbpedia.org/resource/Perth,18,2
http://dbpedia.org/resource/Rome,71,1
http://dbpedia.org/resource/La_Paz,139,1
http://dbpedia.org/resource/Washington_Dc,45,2
http://dbpedia.org/resource/Kobe,37,2
http://dbpedia.org/resource/Chicago,47,2
http://dbpedia.org/resource/Victoria,104,1
http://dbpedia.org/resource/Tel_Aviv,78,1
http://dbpedia.org/resource/Helsinki,9,2
http://dbpedia.org/resource/Miami,64,2
http://dbpedia.org/resource/Kingston,132,1
http://dbpedia.org/resource/Manila,101,1
http://dbpedia.org/resource/Amman,129,1
http://dbpedia.org/resource/Manama,125,1
http://dbpedia.org/resource/Douala,186,0
http://dbpedia.org/resource/Karachi,164,1
http://dbpedia.org/resource/Port_Elizabeth,95,1
http://dbpedia.org/resource/Sarajevo,189,0
http://dbpedia.org/resource/Brussels,19,2
http://dbpedia.org/resource/Santiago,87,1
http://dbpedia.org/resource/Brisbane,26,2
http://dbpedia.org/resource/Lyon,33,2
http://dbpedia.org/resource/St._Peter_Port,63,2
http://dbpedia.org/resource/Lexington,38,2
http://dbpedia.org/resource/Khartoum,210,0
http://dbpedia.org/resource/Madras,173,1
http://dbpedia.org/resource/Novosibirsk,179,0
http://dbpedia.org/resource/Casablanca,124,1
http://dbpedia.org/resource/Berlin,29,2
http://dbpedia.org/resource/Yaounde,177,0
http://dbpedia.org/resource/New_Delhi,181,0
http://dbpedia.org/resource/Singapore,44,2
http://dbpedia.org/resource/Colombo,118,1
http://dbpedia.org/resource/Los_Angeles,42,2
http://dbpedia.org/resource/Dusseldorf,15,2
http://dbpedia.org/resource/Bratislava,117,1
http://dbpedia.org/resource/Honolulu,14,2
http://dbpedia.org/resource/Zurich,2,2
http://dbpedia.org/resource/Katsuyama,69,1
This diff is collapsed.
This diff is collapsed.
"""isort:skip_file"""
from .kg import KG, Vertex
from .sparql_loader import SPARQLLoader
from .rdf_loader import RDFLoader
__all__ = [
"KG",
"RDFLoader",
"SPARQLLoader",
"Vertex",
]
import abc
from typing import List, Tuple
class Vertex(object):
vertex_counter = 0
def __init__(self, name, predicate=False, vprev=None, vnext=None):
self.name = name
self.predicate = predicate
self.vprev = vprev
self.vnext = vnext
self.id = Vertex.vertex_counter
Vertex.vertex_counter += 1
def __eq__(self, other):
if other is None:
return False
return self.__hash__() == other.__hash__()
def __hash__(self):
if self.predicate:
return hash((self.id, self.vprev, self.vnext, self.name))
else:
return hash(self.name)
def __lt__(self, other):
return self.name < other.name
class KG(metaclass=abc.ABCMeta):
"""Represents a Knowledge Graph."""
def __init__(self):
pass
@abc.abstractmethod
def get_hops(self, vertex: str) -> List[Tuple[str, str]]:
"""Returns a hop (vertex -> predicate -> object)
Args:
vertex: The name of the vertex to get the hops.
Returns:
The hops of a vertex in a (predicate, object) form.
"""
raise NotImplementedError("This has to be implemented")
from collections import defaultdict
from typing import List, Set, Tuple
import matplotlib.pyplot as plt
import networkx as nx
import rdflib
from graphs import KG, Vertex
class RDFLoader(KG):
"""Represents a Knowledge Graph from RDFLib."""
def __init__(self, file_name, label_predicates, file_type=None):
self.file_name = file_name
self.file_type = file_type
self.label_predicates = label_predicates
self._inv_transition_matrix = defaultdict(set)
self._transition_matrix = defaultdict(set)
self._vertices = set()
self._entities = set()
self._read_file()
def _read_file(self) -> None:
"""Parses a file with rdflib"""
kg = rdflib.Graph()
try:
if self.file_type is None:
kg.parse(self.file_name, format=self.file_name.split(".")[-1])
else:
kg.parse(self.file_name, self.file_type)
except Exception:
kg.parse(self.file_name)
for (s, p, o) in kg:
if p not in self.label_predicates:
s_v = Vertex(str(s))
o_v = Vertex(str(o))
p_v = Vertex(str(p), predicate=True, vprev=s_v, vnext=o_v)
self.add_vertex(s_v)
self.add_vertex(p_v)
self.add_vertex(o_v)
self.add_edge(s_v, p_v)
self.add_edge(p_v, o_v)
def add_vertex(self, vertex: Vertex) -> None:
"""Adds a vertex to the Knowledge Graph.
Args:
vertex: The vertex
"""
self._vertices.add(vertex)
if not vertex.predicate:
self._entities.add(vertex)
def add_edge(self, v1: Vertex, v2: Vertex) -> None:
"""Adds a uni-directional edge.
Args:
v1: The first vertex.
v2: The second vertex.
"""
self._transition_matrix[v1].add(v2)
self._inv_transition_matrix[v2].add(v1)
def get_hops(self, vertex: str) -> List[Tuple[str, str]]:
"""Returns a hop (vertex -> predicate -> object)
Args:
vertex: The name of the vertex to get the hops.
Returns:
The hops of a vertex in a (predicate, object) form.
"""
if isinstance(vertex, str):
vertex = Vertex(vertex) # type: ignore
hops = []
predicates = self._transition_matrix[vertex]
for pred in predicates:
assert len(self._transition_matrix[pred]) == 1
for obj in self._transition_matrix[pred]:
hops.append((pred, obj))
return hops
def get_inv_neighbors(self, vertex: Vertex) -> Set[Vertex]:
"""Gets the reverse neighbors of a vertex.
Args:
vertex: The vertex.
Returns:
The reverse neighbors of a vertex.
"""
if isinstance(vertex, str):
vertex = Vertex(vertex)
return self._inv_transition_matrix[vertex]
def get_neighbors(self, vertex: Vertex) -> Set[Vertex]:
"""Gets the neighbors of a vertex.
Args:
vertex: The vertex.
Returns:
The neighbors of a vertex.
"""
if isinstance(vertex, str):
vertex = Vertex(vertex)
return self._transition_matrix[vertex]
def remove_edge(self, v1: str, v2: str):
"""Removes the edge (v1 -> v2) if present.
Args:
v1: The first vertex.
v2: The second vertex.
"""
if v2 in self._transition_matrix[v1]:
self._transition_matrix[v1].remove(v2)
def visualise(self) -> None:
"""Visualises the Knowledge Graph."""
nx_graph = nx.DiGraph()
for v in self._vertices:
if not v.predicate:
name = v.name.split("/")[-1]
nx_graph.add_node(name, name=name, pred=v.predicate)
for v in self._vertices:
if not v.predicate:
v_name = v.name.split("/")[-1]
# Neighbors are predicates
for pred in self.get_neighbors(v):
pred_name = pred.name.split("/")[-1]
for obj in self.get_neighbors(pred):
obj_name = obj.name.split("/")[-1]
nx_graph.add_edge(v_name, obj_name, name=pred_name)
plt.figure(figsize=(10, 10))
_pos = nx.circular_layout(nx_graph)
nx.draw_networkx_nodes(nx_graph, pos=_pos)
nx.draw_networkx_edges(nx_graph, pos=_pos)
nx.draw_networkx_labels(nx_graph, pos=_pos)
names = nx.get_edge_attributes(nx_graph, "name")
nx.draw_networkx_edge_labels(nx_graph, pos=_pos, edge_labels=names)
from typing import List, Tuple
from SPARQLWrapper import JSON, SPARQLWrapper
from graphs import KG
class SPARQLLoader(KG):
"""Represents a Knowledge Graph from a SPARQL endpoint."""
def __init__(self, location):
self.location = location
self.endpoint = SPARQLWrapper(self.location)
def get_hops(self, vertex: str) -> List[Tuple[str, str]]:
"""Returns a hop (vertex -> predicate -> object)
Args:
vertex: The name of the vertex to get the hops.
Returns:
The hops of a vertex in a (predicate, object) form.
"""
if not vertex.startswith("http://"):
return []
query = (
"""
SELECT ?p ?o WHERE {
<"""
+ vertex
+ """> ?p ?o .
}
"""
)
self.endpoint.setQuery(query)
self.endpoint.setReturnFormat(JSON)
results = self.endpoint.query().convert()
return [
(result["p"]["value"], result["o"]["value"])
for result in results["results"]["bindings"]
]
def get_sub_pred(self, vertex: str) -> List[Tuple[str, str]]:
"""Returns a hop (subject -> predicate -> vertex)
Args:
vertex: The name of the vertex to get the hops.
Returns:
The hops of a vertex in a (subject, predicate) form.
"""
if not vertex.startswith("http://"):
return []
query = (
"""
SELECT ?s ?p WHERE {
?s ?p <"""
+ vertex
+ """> .
}
"""
)
self.endpoint.setQuery(query)
self.endpoint.setReturnFormat(JSON)
results = self.endpoint.query().convert()
return [
(result["s"]["value"], result["p"]["value"])
for result in results["results"]["bindings"]
]
import random
import os
import numpy as np
os.environ['PYTHONHASHSEED'] = '42'
random.seed(42)
np.random.seed(42)
import argparse
import rdflib
import node2vec
import pandas as pd
import matplotlib.pyplot as plt
from gensim.models import Word2Vec
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import networkx as nx
from graph import *
from rdf2vec import RDF2VecTransformer
from graphs import SPARQLLoader
import numpy as np
from collections import defaultdict
import warnings
warnings.filterwarnings('ignore')
def parse_args():
'''
Parses the node2vec arguments.
'''
parser = argparse.ArgumentParser(description="Run ontowalk2vec.")
parser.add_argument('--input', nargs='?', default='',
help='Input graph path')
parser.add_argument('--output', nargs='?', default='emb/data.emb',
help='Embeddings path')
parser.add_argument('--dimensions', type=int, default=128,
help='Number of dimensions. Default is 128.')
parser.add_argument('--walk-length', type=int, default=80,
help='Length of walk per source. Default is 80.')
parser.add_argument('--num-walks', type=int, default=10,
help='Number of walks per source. Default is 10.')
parser.add_argument('--window-size', type=int, default=10,
help='Context size for optimization. Default is 10.')
parser.add_argument('--iter', default=1, type=int,
help='Number of epochs in SGD')
parser.add_argument('--workers', type=int, default=8,
help='Number of parallel workers. Default is 8.')
parser.add_argument('--p', type=float, default=1,
help='Return hyperparameter. Default is 1.')
parser.add_argument('--q', type=float, default=1,
help='Inout hyperparameter. Default is 1.')
parser.add_argument('--weighted', dest='weighted', action='store_true',
help='Boolean specifying (un)weighted. Default is unweighted.')
parser.add_argument('--unweighted', dest='unweighted', action='store_false')
parser.set_defaults(weighted=False)
parser.add_argument('--directed', dest='directed', action='store_true',
help='Graph is (un)directed. Default is undirected.')
parser.add_argument('--undirected', dest='undirected', action='store_false')
parser.set_defaults(directed=True)
return parser.parse_args()
def main(args):
# Load the data with rdflib
print(end='Loading data... ', flush=True)
g = rdflib.Graph()
g.parse('../ontowalk2vec/data/dbpedia_2016-10.owl')
# Load our train & test instances and labels
test_data = pd.read_csv('../ontowalk2vec/data/cities_test.csv', sep=',')
train_data = pd.read_csv('../ontowalk2vec/data/cities_train.csv', sep=',')
train_points = [rdflib.URIRef(x) for x in train_data['city']]
train_labels = train_data['label']
test_points = [rdflib.URIRef(x) for x in test_data['city']]
test_labels = test_data['label']
all_labels = list(train_labels) + list(test_labels)
# Define the label predicates, all triples with these predicates
# will be excluded from the graph
label_predicates = [
#rdflib.term.URIRef('http://dl-learner.org/carcinogenesis#isMutagenic')
]
# Convert the rdflib to our KnowledgeGraph object
kg = SPARQLLoader("https://dbpedia.org/sparql")
kgraph = KnowledgeGraph()
for root in (train_points+test_points):
walks = {(root,)}