Skip to content
Snippets Groups Projects
Commit 381740e6 authored by Liam Byrne's avatar Liam Byrne
Browse files

SAX Parser implemented

parent 8d4438ca
No related branches found
No related tags found
No related merge requests found
.idea .idea
data/
data-collection/out/
\ No newline at end of file
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import java.io.IOException;
class BadgeRow {
private String Id;
private String UserId;
private String Name;
private String Date;
private String BadgeClass;
private String TagBased;
public BadgeRow(String id, String userId, String name, String date, String badgeClass, String tagBased) {
Id = id;
UserId = userId;
Name = name;
Date = date;
BadgeClass = badgeClass;
TagBased = tagBased;
}
public String getId() {
return Id;
}
public String getUserId() {
return UserId;
}
public String getName() {
return Name;
}
public String getDate() {
return Date;
}
public String getBadgeClass() {
return BadgeClass;
}
public String getTagBased() {
return TagBased;
}
@Override
public String toString() {
return "BadgeRow{" +
"Id='" + Id + '\'' +
", UserId='" + UserId + '\'' +
", Name='" + Name + '\'' +
", Date='" + Date + '\'' +
", BadgeClass='" + BadgeClass + '\'' +
", TagBased='" + TagBased + '\'' +
'}';
}
}
public class BadgesHandler extends DefaultHandler {
private static final String ROW = "row";
private static final String BADGES = "badges";
@Override
public void startElement(String uri, String lName, String qName, Attributes attr) throws SAXException {
if (qName.equals(ROW)) {
BadgeRow badgeRow = new BadgeRow(
attr.getValue("Id"),
attr.getValue("UserId"),
attr.getValue("Name"),
attr.getValue("Date"),
attr.getValue("Class"),
attr.getValue("TagBased")
);
//System.out.println(badgeRow);
} else if (!qName.equals(BADGES)) {
throw new SAXException(String.format("Unknown tag %s", qName));
}
}
public static void main(String[] args) throws ParserConfigurationException, SAXException, IOException {
final long start = System.currentTimeMillis();
System.out.println(start);
SAXParserFactory factory = SAXParserFactory.newInstance();
SAXParser saxParser = factory.newSAXParser();
BadgesHandler handler = new BadgesHandler();
saxParser.parse(
"..\\data\\raw\\stackoverflow.com-Badges\\badges.xml", handler);
// time end
final long durationInMilliseconds = System.currentTimeMillis()-start;
System.out.println("SAX parse took " + durationInMilliseconds + "ms.");
}
}
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
class CommentRow {
private String id;
private String postId;
private String score;
private String text;
private String creationDate;
private String userId;
private String contentLicense;
public CommentRow(String id, String postId, String score, String text,
String creationDate, String userId, String contentLicense) {
this.id = id;
this.postId = postId;
this.score = score;
this.text = text;
this.creationDate = creationDate;
this.userId = userId;
this.contentLicense = contentLicense;
}
public String getId() {
return id;
}
public String getPostId() {
return postId;
}
public String getScore() {
return score;
}
public String getText() {
return text;
}
public String getCreationDate() {
return creationDate;
}
public String getUserId() {
return userId;
}
public String getContentLicense() {
return contentLicense;
}
@Override
public String toString() {
return "CommentRow{" +
"id='" + id + '\'' +
", postId='" + postId + '\'' +
", score='" + score + '\'' +
", text='" + text + '\'' +
", creationDate='" + creationDate + '\'' +
", userId='" + userId + '\'' +
", contentLicense='" + contentLicense + '\'' +
'}';
}
}
public class CommentsHandler extends DefaultHandler {
private static final String ROW = "row";
private static final String COMMENTS = "comments";
private StringBuilder elementValue;
@Override
public void startElement(String uri, String lName, String qName, Attributes attr) throws SAXException {
if (qName.equals(ROW)) {
CommentRow commentRow = new CommentRow(
attr.getValue("Id"),
attr.getValue("PostId"),
attr.getValue("Score"),
attr.getValue("Text"),
attr.getValue("CreationDate"),
attr.getValue("UserId"),
attr.getValue("ContentLicense")
);
System.out.println(commentRow);
} else if (!qName.equals(COMMENTS)) {
throw new SAXException(String.format("Unknown tag %s", qName));
}
}
public static void main(String[] args) throws ParserConfigurationException, SAXException, IOException {
final long start = System.currentTimeMillis();
SAXParserFactory factory = SAXParserFactory.newInstance();
SAXParser saxParser = factory.newSAXParser();
CommentsHandler handler = new CommentsHandler();
saxParser.parse(
"..\\data\\raw\\stackoverflow.com-Comments\\comments.xml", handler);
// time end
final long durationInMilliseconds = System.currentTimeMillis()-start;
System.out.println("SAX parse took " + durationInMilliseconds + "ms.");
}
}
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import javax.xml.XMLConstants;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import java.io.IOException;
class PostRow {
private String Id;
private String PostTypeId;
private String AcceptedAnswerId;
private String CreationDate;
private String Score;
private String ViewCount;
private String Body;
private String OwnerUserId;
private String LastEditorUserId;
private String LastEditorDisplayName;
private String LastEditDate;
private String Title;
private String Tags;
private String AnswerCount;
private String CommentCount;
private String FavoriteCount;
private String CommunityOwnedDate;
private String ContentLicense;
public PostRow(String id, String postTypeId, String acceptedAnswerId,
String creationDate, String score, String viewCount,
String body, String ownerUserId, String lastEditorUserId,
String lastEditorDisplayName, String lastEditDate,
String title, String tags, String answerCount,
String commentCount, String favoriteCount,
String communityOwnedDate, String contentLicense) {
Id = id;
PostTypeId = postTypeId;
AcceptedAnswerId = acceptedAnswerId;
CreationDate = creationDate;
Score = score;
ViewCount = viewCount;
Body = body;
OwnerUserId = ownerUserId;
LastEditorUserId = lastEditorUserId;
LastEditorDisplayName = lastEditorDisplayName;
LastEditDate = lastEditDate;
Title = title;
Tags = tags;
AnswerCount = answerCount;
CommentCount = commentCount;
FavoriteCount = favoriteCount;
CommunityOwnedDate = communityOwnedDate;
ContentLicense = contentLicense;
}
@Override
public String toString() {
return "PostRow{" +
"Id='" + Id + '\'' +
", PostTypeId='" + PostTypeId + '\'' +
", AcceptedAnswerId='" + AcceptedAnswerId + '\'' +
", CreationDate='" + CreationDate + '\'' +
", Score='" + Score + '\'' +
", ViewCount='" + ViewCount + '\'' +
", Body='" + Body + '\'' +
", OwnerUserId='" + OwnerUserId + '\'' +
", LastEditorUserId='" + LastEditorUserId + '\'' +
", LastEditorDisplayName='" + LastEditorDisplayName + '\'' +
", LastEditDate='" + LastEditDate + '\'' +
", Title='" + Title + '\'' +
", Tags='" + Tags + '\'' +
", AnswerCount='" + AnswerCount + '\'' +
", CommentCount='" + CommentCount + '\'' +
", FavoriteCount='" + FavoriteCount + '\'' +
", CommunityOwnedDate='" + CommunityOwnedDate + '\'' +
", ContentLicense='" + ContentLicense + '\'' +
'}';
}
public String getId() {
return Id;
}
public String getPostTypeId() {
return PostTypeId;
}
public String getAcceptedAnswerId() {
return AcceptedAnswerId;
}
public String getCreationDate() {
return CreationDate;
}
public String getScore() {
return Score;
}
public String getViewCount() {
return ViewCount;
}
public String getBody() {
return Body;
}
public String getOwnerUserId() {
return OwnerUserId;
}
public String getLastEditorUserId() {
return LastEditorUserId;
}
public String getLastEditorDisplayName() {
return LastEditorDisplayName;
}
public String getLastEditDate() {
return LastEditDate;
}
public String getTitle() {
return Title;
}
public String getTags() {
return Tags;
}
public String getAnswerCount() {
return AnswerCount;
}
public String getCommentCount() {
return CommentCount;
}
public String getFavoriteCount() {
return FavoriteCount;
}
public String getCommunityOwnedDate() {
return CommunityOwnedDate;
}
public String getContentLicense() {
return ContentLicense;
}
}
public class PostsHandler extends DefaultHandler {
private static final String ROW = "row";
private static final String POSTS = "posts";
private StringBuilder elementValue;
@Override
public void startElement(String uri, String lName, String qName, Attributes attr) throws SAXException {
if (qName.equals(ROW)) {
PostRow postRow = new PostRow(
attr.getValue("Id"),
attr.getValue("PostTypeId"),
attr.getValue("AcceptedAnswerId"),
attr.getValue("CreationDate"),
attr.getValue("Score"),
attr.getValue("ViewCount"),
attr.getValue("Body"),
attr.getValue("OwnerUserId"),
attr.getValue("LastEditorUserId"),
attr.getValue("LastEditorDisplayName"),
attr.getValue("LastEditDate"),
attr.getValue("Title"),
attr.getValue("Tags"),
attr.getValue("AnswerCount"),
attr.getValue("CommentCount"),
attr.getValue("FavoriteCount"),
attr.getValue("CommunityOwnedDate"),
attr.getValue("ContentLicense")
);
System.out.println(postRow);
} else if (!qName.equals(POSTS)) {
throw new SAXException(String.format("Unknown tag %s", qName));
}
}
public static void main(String[] args) throws ParserConfigurationException, SAXException, IOException {
final long start = System.currentTimeMillis();
SAXParserFactory factory = SAXParserFactory.newInstance();
factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING,false);
SAXParser saxParser = factory.newSAXParser();
PostsHandler handler = new PostsHandler();
saxParser.parse(
"..\\data\\raw\\stackoverflow.com-Posts\\posts.xml", handler);
// time end
final long durationInMilliseconds = System.currentTimeMillis()-start;
System.out.println("SAX parse took " + durationInMilliseconds + "ms.");
}
}
import org.apache.logging.log4j.Level;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.core.config.Configurator;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
public class SQLiteSession {
private final Logger logger = LogManager.getLogger(SQLiteSession.class);
private Connection conn;
public SQLiteSession(String url) {
Configurator.setLevel(logger.getName(), Level.INFO);
try {
// create a connection to the database
conn = DriverManager.getConnection(url);
logger.info("Connection to SQLite has been established.");
} catch (SQLException e) {
logger.error(e.getMessage());
}
}
public void endSession() {
try {
if (conn != null) {
conn.close();
}
} catch (SQLException ex) {
logger.error(ex.getMessage());
}
}
public static void main(String[] args) {
SQLiteSession s = new SQLiteSession(
"jdbc:sqlite:C:\\Users\\lhb1g20\\OneDrive - University of Southampton\\graph4stackoverflow\\data-collection\\stackoverflow.db"
);
s.endSession();
}
}
\ No newline at end of file
public class TagsHandler {
}
public class UsersHandler {
}
public class VotesHandler {
}
File added
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment