diff --git a/.gitignore b/.gitignore
index 723ef36f4e4f32c4560383aa5987c575a30c6535..1a192ea1a341a08aae7411cf09df2d2bb0b63dc4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,3 @@
-.idea
\ No newline at end of file
+.idea
+data/
+data-collection/out/
\ No newline at end of file
diff --git a/data-collection/BadgesHandler.java b/data-collection/BadgesHandler.java
new file mode 100644
index 0000000000000000000000000000000000000000..0074ce4010d6c058b2e63e51968bb8ab9fb6e9b4
--- /dev/null
+++ b/data-collection/BadgesHandler.java
@@ -0,0 +1,101 @@
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.SAXParser;
+import javax.xml.parsers.SAXParserFactory;
+import java.io.IOException;
+
+class BadgeRow {
+    private String Id;
+    private String UserId;
+    private String Name;
+    private String Date;
+    private String BadgeClass;
+    private String TagBased;
+
+    public BadgeRow(String id, String userId, String name, String date, String badgeClass, String tagBased) {
+        Id = id;
+        UserId = userId;
+        Name = name;
+        Date = date;
+        BadgeClass = badgeClass;
+        TagBased = tagBased;
+    }
+
+    public String getId() {
+        return Id;
+    }
+
+    public String getUserId() {
+        return UserId;
+    }
+
+    public String getName() {
+        return Name;
+    }
+
+    public String getDate() {
+        return Date;
+    }
+
+    public String getBadgeClass() {
+        return BadgeClass;
+    }
+
+    public String getTagBased() {
+        return TagBased;
+    }
+
+    @Override
+    public String toString() {
+        return "BadgeRow{" +
+                "Id='" + Id + '\'' +
+                ", UserId='" + UserId + '\'' +
+                ", Name='" + Name + '\'' +
+                ", Date='" + Date + '\'' +
+                ", BadgeClass='" + BadgeClass + '\'' +
+                ", TagBased='" + TagBased + '\'' +
+                '}';
+    }
+}
+
+public class BadgesHandler extends DefaultHandler {
+    private static final String ROW = "row";
+    private static final String BADGES = "badges";
+
+    @Override
+    public void startElement(String uri, String lName, String qName, Attributes attr) throws SAXException {
+        if (qName.equals(ROW)) {
+            BadgeRow badgeRow = new BadgeRow(
+                    attr.getValue("Id"),
+                    attr.getValue("UserId"),
+                    attr.getValue("Name"),
+                    attr.getValue("Date"),
+                    attr.getValue("Class"),
+                    attr.getValue("TagBased")
+            );
+            //System.out.println(badgeRow);
+
+        } else if (!qName.equals(BADGES)) {
+            throw new SAXException(String.format("Unknown tag %s", qName));
+        }
+
+    }
+
+    public static void main(String[] args) throws ParserConfigurationException, SAXException, IOException {
+        final long start = System.currentTimeMillis();
+        System.out.println(start);
+        SAXParserFactory factory = SAXParserFactory.newInstance();
+        SAXParser saxParser = factory.newSAXParser();
+        BadgesHandler handler = new BadgesHandler();
+        saxParser.parse(
+                "..\\data\\raw\\stackoverflow.com-Badges\\badges.xml", handler);
+
+
+        // time end
+        final long durationInMilliseconds = System.currentTimeMillis()-start;
+        System.out.println("SAX parse took " + durationInMilliseconds + "ms.");
+    }
+}
diff --git a/data-collection/CommentsHandler.java b/data-collection/CommentsHandler.java
new file mode 100644
index 0000000000000000000000000000000000000000..6c038ee4cd6f2ce9f6526e6a452d4a7f6e860782
--- /dev/null
+++ b/data-collection/CommentsHandler.java
@@ -0,0 +1,112 @@
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.SAXParser;
+import javax.xml.parsers.SAXParserFactory;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+class CommentRow {
+    private String id;
+    private String postId;
+    private String score;
+    private String text;
+    private String creationDate;
+    private String userId;
+    private String contentLicense;
+
+    public CommentRow(String id, String postId, String score, String text,
+                      String creationDate, String userId, String contentLicense) {
+        this.id = id;
+        this.postId = postId;
+        this.score = score;
+        this.text = text;
+        this.creationDate = creationDate;
+        this.userId = userId;
+        this.contentLicense = contentLicense;
+    }
+
+    public String getId() {
+        return id;
+    }
+
+    public String getPostId() {
+        return postId;
+    }
+
+    public String getScore() {
+        return score;
+    }
+
+    public String getText() {
+        return text;
+    }
+
+    public String getCreationDate() {
+        return creationDate;
+    }
+
+    public String getUserId() {
+        return userId;
+    }
+
+    public String getContentLicense() {
+        return contentLicense;
+    }
+
+    @Override
+    public String toString() {
+        return "CommentRow{" +
+                "id='" + id + '\'' +
+                ", postId='" + postId + '\'' +
+                ", score='" + score + '\'' +
+                ", text='" + text + '\'' +
+                ", creationDate='" + creationDate + '\'' +
+                ", userId='" + userId + '\'' +
+                ", contentLicense='" + contentLicense + '\'' +
+                '}';
+    }
+}
+public class CommentsHandler extends DefaultHandler {
+    private static final String ROW = "row";
+    private static final String COMMENTS = "comments";
+    private StringBuilder elementValue;
+
+    @Override
+    public void startElement(String uri, String lName, String qName, Attributes attr) throws SAXException {
+        if (qName.equals(ROW)) {
+            CommentRow commentRow = new CommentRow(
+                attr.getValue("Id"),
+                attr.getValue("PostId"),
+                attr.getValue("Score"),
+                attr.getValue("Text"),
+                attr.getValue("CreationDate"),
+                attr.getValue("UserId"),
+                attr.getValue("ContentLicense")
+            );
+            System.out.println(commentRow);
+
+        } else if (!qName.equals(COMMENTS)) {
+            throw new SAXException(String.format("Unknown tag %s", qName));
+        }
+
+    }
+
+    public static void main(String[] args) throws ParserConfigurationException, SAXException, IOException {
+        final long start = System.currentTimeMillis();
+
+        SAXParserFactory factory = SAXParserFactory.newInstance();
+        SAXParser saxParser = factory.newSAXParser();
+        CommentsHandler handler = new CommentsHandler();
+        saxParser.parse(
+                "..\\data\\raw\\stackoverflow.com-Comments\\comments.xml", handler);
+
+
+        // time end
+        final long durationInMilliseconds = System.currentTimeMillis()-start;
+        System.out.println("SAX parse took " + durationInMilliseconds + "ms.");
+    }
+}
diff --git a/data-collection/PostsHandler.java b/data-collection/PostsHandler.java
new file mode 100644
index 0000000000000000000000000000000000000000..47b7063534e7f845771c02577b0030d58d03007f
--- /dev/null
+++ b/data-collection/PostsHandler.java
@@ -0,0 +1,206 @@
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+import javax.xml.XMLConstants;
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.SAXParser;
+import javax.xml.parsers.SAXParserFactory;
+import java.io.IOException;
+
+class PostRow {
+    private String Id;
+    private String PostTypeId;
+    private String AcceptedAnswerId;
+    private String CreationDate;
+    private String Score;
+    private String ViewCount;
+    private String Body;
+    private String OwnerUserId;
+    private String LastEditorUserId;
+    private String LastEditorDisplayName;
+    private String LastEditDate;
+    private String Title;
+    private String Tags;
+    private String AnswerCount;
+    private String CommentCount;
+    private String FavoriteCount;
+    private String CommunityOwnedDate;
+    private String ContentLicense;
+
+    public PostRow(String id, String postTypeId, String acceptedAnswerId,
+                   String creationDate, String score, String viewCount,
+                   String body, String ownerUserId, String lastEditorUserId,
+                   String lastEditorDisplayName, String lastEditDate,
+                   String title, String tags, String answerCount,
+                   String commentCount, String favoriteCount,
+                   String communityOwnedDate, String contentLicense) {
+        Id = id;
+        PostTypeId = postTypeId;
+        AcceptedAnswerId = acceptedAnswerId;
+        CreationDate = creationDate;
+        Score = score;
+        ViewCount = viewCount;
+        Body = body;
+        OwnerUserId = ownerUserId;
+        LastEditorUserId = lastEditorUserId;
+        LastEditorDisplayName = lastEditorDisplayName;
+        LastEditDate = lastEditDate;
+        Title = title;
+        Tags = tags;
+        AnswerCount = answerCount;
+        CommentCount = commentCount;
+        FavoriteCount = favoriteCount;
+        CommunityOwnedDate = communityOwnedDate;
+        ContentLicense = contentLicense;
+    }
+
+    @Override
+    public String toString() {
+        return "PostRow{" +
+                "Id='" + Id + '\'' +
+                ", PostTypeId='" + PostTypeId + '\'' +
+                ", AcceptedAnswerId='" + AcceptedAnswerId + '\'' +
+                ", CreationDate='" + CreationDate + '\'' +
+                ", Score='" + Score + '\'' +
+                ", ViewCount='" + ViewCount + '\'' +
+                ", Body='" + Body + '\'' +
+                ", OwnerUserId='" + OwnerUserId + '\'' +
+                ", LastEditorUserId='" + LastEditorUserId + '\'' +
+                ", LastEditorDisplayName='" + LastEditorDisplayName + '\'' +
+                ", LastEditDate='" + LastEditDate + '\'' +
+                ", Title='" + Title + '\'' +
+                ", Tags='" + Tags + '\'' +
+                ", AnswerCount='" + AnswerCount + '\'' +
+                ", CommentCount='" + CommentCount + '\'' +
+                ", FavoriteCount='" + FavoriteCount + '\'' +
+                ", CommunityOwnedDate='" + CommunityOwnedDate + '\'' +
+                ", ContentLicense='" + ContentLicense + '\'' +
+                '}';
+    }
+
+    public String getId() {
+        return Id;
+    }
+
+    public String getPostTypeId() {
+        return PostTypeId;
+    }
+
+    public String getAcceptedAnswerId() {
+        return AcceptedAnswerId;
+    }
+
+    public String getCreationDate() {
+        return CreationDate;
+    }
+
+    public String getScore() {
+        return Score;
+    }
+
+    public String getViewCount() {
+        return ViewCount;
+    }
+
+    public String getBody() {
+        return Body;
+    }
+
+    public String getOwnerUserId() {
+        return OwnerUserId;
+    }
+
+    public String getLastEditorUserId() {
+        return LastEditorUserId;
+    }
+
+    public String getLastEditorDisplayName() {
+        return LastEditorDisplayName;
+    }
+
+    public String getLastEditDate() {
+        return LastEditDate;
+    }
+
+    public String getTitle() {
+        return Title;
+    }
+
+    public String getTags() {
+        return Tags;
+    }
+
+    public String getAnswerCount() {
+        return AnswerCount;
+    }
+
+    public String getCommentCount() {
+        return CommentCount;
+    }
+
+    public String getFavoriteCount() {
+        return FavoriteCount;
+    }
+
+    public String getCommunityOwnedDate() {
+        return CommunityOwnedDate;
+    }
+
+    public String getContentLicense() {
+        return ContentLicense;
+    }
+}
+
+public class PostsHandler extends DefaultHandler {
+    private static final String ROW = "row";
+    private static final String POSTS = "posts";
+    private StringBuilder elementValue;
+
+    @Override
+    public void startElement(String uri, String lName, String qName, Attributes attr) throws SAXException {
+        if (qName.equals(ROW)) {
+            PostRow postRow = new PostRow(
+                    attr.getValue("Id"),
+                    attr.getValue("PostTypeId"),
+                    attr.getValue("AcceptedAnswerId"),
+                    attr.getValue("CreationDate"),
+                    attr.getValue("Score"),
+                    attr.getValue("ViewCount"),
+                    attr.getValue("Body"),
+                    attr.getValue("OwnerUserId"),
+                    attr.getValue("LastEditorUserId"),
+                    attr.getValue("LastEditorDisplayName"),
+                    attr.getValue("LastEditDate"),
+                    attr.getValue("Title"),
+                    attr.getValue("Tags"),
+                    attr.getValue("AnswerCount"),
+                    attr.getValue("CommentCount"),
+                    attr.getValue("FavoriteCount"),
+                    attr.getValue("CommunityOwnedDate"),
+                    attr.getValue("ContentLicense")
+            );
+            System.out.println(postRow);
+
+        } else if (!qName.equals(POSTS)) {
+            throw new SAXException(String.format("Unknown tag %s", qName));
+        }
+
+    }
+
+    public static void main(String[] args) throws ParserConfigurationException, SAXException, IOException {
+        final long start = System.currentTimeMillis();
+
+        SAXParserFactory factory = SAXParserFactory.newInstance();
+        factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING,false);
+        SAXParser saxParser = factory.newSAXParser();
+        PostsHandler handler = new PostsHandler();
+        saxParser.parse(
+                "..\\data\\raw\\stackoverflow.com-Posts\\posts.xml", handler);
+
+
+        // time end
+        final long durationInMilliseconds = System.currentTimeMillis()-start;
+        System.out.println("SAX parse took " + durationInMilliseconds + "ms.");
+    }
+}
diff --git a/data-collection/SQLiteSession.java b/data-collection/SQLiteSession.java
new file mode 100644
index 0000000000000000000000000000000000000000..313845e45c197ce0b8e2f471fa17e9628adeb6f6
--- /dev/null
+++ b/data-collection/SQLiteSession.java
@@ -0,0 +1,40 @@
+import org.apache.logging.log4j.Level;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.apache.logging.log4j.core.config.Configurator;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.SQLException;
+
+public class SQLiteSession {
+    private final Logger logger = LogManager.getLogger(SQLiteSession.class);
+    private Connection conn;
+    public SQLiteSession(String url) {
+        Configurator.setLevel(logger.getName(), Level.INFO);
+        try {
+            // create a connection to the database
+            conn = DriverManager.getConnection(url);
+            logger.info("Connection to SQLite has been established.");
+        } catch (SQLException e) {
+            logger.error(e.getMessage());
+        }
+    }
+
+    public void endSession() {
+        try {
+            if (conn != null) {
+                conn.close();
+            }
+        } catch (SQLException ex) {
+            logger.error(ex.getMessage());
+        }
+    }
+
+    public static void main(String[] args) {
+        SQLiteSession s = new SQLiteSession(
+        "jdbc:sqlite:C:\\Users\\lhb1g20\\OneDrive - University of Southampton\\graph4stackoverflow\\data-collection\\stackoverflow.db"
+        );
+        s.endSession();
+    }
+}
\ No newline at end of file
diff --git a/data-collection/TagsHandler.java b/data-collection/TagsHandler.java
new file mode 100644
index 0000000000000000000000000000000000000000..02188aee3c67ec56ecaecb1bd073d6e41ad2ba9b
--- /dev/null
+++ b/data-collection/TagsHandler.java
@@ -0,0 +1,2 @@
+public class TagsHandler {
+}
diff --git a/data-collection/UsersHandler.java b/data-collection/UsersHandler.java
new file mode 100644
index 0000000000000000000000000000000000000000..2714e4b21bc388806be3b1104d12af14becd56f7
--- /dev/null
+++ b/data-collection/UsersHandler.java
@@ -0,0 +1,2 @@
+public class UsersHandler {
+}
diff --git a/data-collection/VotesHandler.java b/data-collection/VotesHandler.java
new file mode 100644
index 0000000000000000000000000000000000000000..89a990d8762b81ff13503166fede911fc4a4bf4f
--- /dev/null
+++ b/data-collection/VotesHandler.java
@@ -0,0 +1,2 @@
+public class VotesHandler {
+}
diff --git a/data-collection/sqlite-jdbc-3.39.3.0.jar b/data-collection/sqlite-jdbc-3.39.3.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..23ab07358494cdcad77722f4b08748c1442b512a
Binary files /dev/null and b/data-collection/sqlite-jdbc-3.39.3.0.jar differ
diff --git a/data-collection/stackoverflow.db b/data-collection/stackoverflow.db
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391