Convert event.xml and news.xml data to MDX #4

Open
j285he wants to merge 44 commits from mdx-conversion into master
10 changed files with 3069 additions and 6 deletions

5
.gitignore vendored
View File

@ -1,2 +1,7 @@
/common.mk
/build
/scripts/mdx-scripts/node_modules
/scripts/mdx-scripts/markdown-events
/scripts/mdx-scripts/markdown-news
/scripts/mdx-scripts/markdown-talks

View File

@ -4053,7 +4053,7 @@ fascinating problem.
<!-- Fall 2013 -->
<eventitem date="2013-11-23" time="TBD" room="Toronto, ON"
<eventitem date="2013-11-23" time="00:00 AM" room="Toronto, ON"
title="CSC Goes to Toronto Erlang Factory Lite 2013">
<short><p>
The CSC has been invited to attend this Erlang conference in Toronto. If
@ -4577,7 +4577,7 @@ title="The Future of 3D Graphics is in Software!">
</p></abstract>
</eventitem> -->
<eventitem date="2013-06-07" time="6:00 PM, 8:00PM" room="Comfy Lounge" title="Unix 101/ Code Party 0">
<eventitem date="2013-06-07" time="6:00 PM" room="Comfy Lounge" title="Unix 101/ Code Party 0">
<short><p>
We are offering a Unix tutorial on Friday, June 7th, 2013! Following the tutorial a code party will take place.
Bring your laptops and chargers for an awesome night of coding, hacking and learning.
@ -5164,7 +5164,7 @@ in the Firefox engine.
</short>
</eventitem>
<eventitem date="2011-07-09" time="4 PM to 10PM" room="Columbia Lake Firepit"
<eventitem date="2011-07-09" time="4 PM" room="Columbia Lake Firepit"
title="CSC Goes Outside">
<short> <p> Do you like going outside? Are you
vitamin-D deficient from being in the MC too long? Do you think
@ -6118,7 +6118,7 @@ for Unix 103 and 104 that get much more in depth into power programming tools on
</p></abstract>
</eventitem>
<eventitem date="2010-01-18" time="15:30 PM" room="MC2066" title="Wilderness Programming">
<eventitem date="2010-01-18" time="3:30 PM" room="MC2066" title="Wilderness Programming">
<short><p>Paul Lutus describes his early Apple II software development days, conducted from the far end of a 1200-foot power cord, in a tiny Oregon cabin. Paul describes how he wrote a best-seller (Apple Writer) in assembly language, while dealing with power outages, lightning storms and the occasional curious bear.
</p></short>
@ -9877,14 +9877,14 @@ Remember: Monday, January 13, 6:00 PM, MC3001/Comfy Lounge.</p>
<short>The teeth of Free Software</short>
<abstract>
<div style="font-style: italic"><blockquote>
<div><blockquote>
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General
Public License is intended to guarantee your freedom to share and
change free software---to make sure the software is free for all
its users.
<br />
<div style="text-align:right">--- Excerpt from the GNU GPL</div>
<div>--- Excerpt from the GNU GPL</div>
</blockquote></div>
<p> The GNU General Public License is one of the most influential

View File

@ -0,0 +1,111 @@
const fs = require("fs");
const jsdom = require("jsdom");
const { JSDOM } = jsdom;
const { window } = new JSDOM("");
global.window = window;
var showdown = require("showdown");
const converterShowdown = new showdown.Converter();
const libxmljs = require("libxmljs");
const getTerm = require("./getTerm.js");
fs.readFile("../../events.xml", "utf8", (err, data) => {
if (err) {
console.error(err);
return;
}
data = data.replace(/<!DOCTYPE.*>/, "");
parseXML(data);
});
fs.mkdir("./markdown-events/", { recursive: true }, (err) => {
if (err) throw err;
});
const parseTime = (dateStr, timeStr) => {
// if timeStr is a range (ex. 7pm - 9pm) we take the first time (ex. 7pm)
if (/-/.exec(timeStr)) {
timeStr = timeStr.split("-")[0];
}
// converting cases like 7pm to 7PM
timeStr = timeStr.toUpperCase();
// converting cases like 7PM to 7 PM
if (/[0-9](a|A|p|P)/g.exec(timeStr)) {
timeStr = timeStr.replace(/[0-9](?=(a|A|p|P))/g, "$& ");
}
// converting cases like 7 PM to 7:00 PM
if (/(?<!:[0-9])[0-9] (a|A|p|P)/g.exec(timeStr)) {
timeStr = timeStr.replace(/[0-9](?= )/, "$&:00");
}
// convert from CST to EST
const OFFSET_FROM_EST = +1;
let date = new Date(`${dateStr} ${timeStr}`);
date.setHours(date.getHours() + OFFSET_FROM_EST);
return date;
};
const nodeChildrenToString = (nodeChildren) => {
let string = "";
for (let i = 0; i < nodeChildren.length; ++i) {
if (nodeChildren[i].type() === "text") {
string += nodeChildren[i];
} else {
string += nodeChildren[i].toString();
}
}
return string;
};
const parseXML = (XML) => {
const xmlDoc = libxmljs.parseHtml(XML);
let eventdefsChildren = xmlDoc.get("//eventdefs").childNodes();
eventdefsChildren.forEach((eventItem) => {
if (eventItem.type() === "element") {
const title = eventItem.attr("title").value();
const shortNodes = eventItem.get(".//short").childNodes();
let short = converterShowdown.makeMarkdown(
nodeChildrenToString(shortNodes)
);
const dateStr = eventItem.attr("date").value();
const timeStr = eventItem.attr("time").value();
const date = parseTime(dateStr, timeStr);
currentTerm = getTerm(date);
const location = eventItem.attr("room").value();
const online = location.toLowerCase() === "online" ? true : false;
let abstract = short;
if (eventItem.get(".//abstract") !== undefined) {
const abstractNodes = eventItem.get(".//abstract").childNodes();
abstract = converterShowdown.makeMarkdown(
nodeChildrenToString(abstractNodes)
);
}
const md = `---
name: '${title.replace(/'/g, "''")}'
short: '${short.replace(/\n/g, "").replace(/'/g, "''")}'
date: '${date.toString().replace(/'/g, "''")}'
online: ${online}
location: '${location.replace(/'/g, "''")}'
---
${abstract.replace(/<br>/g, "\n")}`;
// remove invalid characters from filenames
let mdTitle = title.replace(/[\\\\/:*?\"<>|]/g, "").replace(/(\s+)/g, "-");
fs.mkdirSync(
`./markdown-events/${currentTerm.year}/${currentTerm.term}`,
{ recursive: true },
(err) => {
if (err) throw err;
}
);
fs.writeFileSync(
`./markdown-events/${currentTerm.year}/${currentTerm.term}/${mdTitle}.md`,
md,
(err) => {
if (err) {
throw err;
}
}
);
}
});
};

View File

@ -0,0 +1,104 @@
<?xml version='1.0'?>
<!DOCTYPE eventdefs SYSTEM "csc.dtd" [<!ENTITY mdash "&#x2014;">]>
<eventdefs>
<!-- Fall 2016 -->
<eventitem date="2016-12-05" time="6:00 PM" room="MC Comfy" title="CSC/PMC EOT Party">
<short>
<p>
The CSC and the PMAMC&amp;OC (aka pure math club) are hosting our end
of term events together! We'll be taking over MC Comfy to hang out,
eat lots of food (from Kismet!), and play board games.
</p>
</short>
<abstract>
<p>
The CSC and the PMAMC&amp;OC (aka pure math club) are hosting our end
of term events together! We'll be taking over MC Comfy to hang out,
eat lots of food (from Kismet!), and play board games.
</p>
</abstract>
</eventitem>
<eventitem date="2016-11-21" time="6:15 PM" room="MC 4063" title="Richard Mann Prof Talk">
<short>
<p>
Professor Richard Mann will be giving a talk, titled "Open Source
Software for Sound Measurement and Analysis". He will be presenting
information about his new course, CS 489, Computational Sound, which
will be running in Winter 2017.
</p>
</short>
<abstract>
<p>
Professor Richard Mann will be giving a talk, titled "Open Source
Software for Sound Measurement and Analysis". He will be presenting
information about his new course, CS 489, Computational Sound, which
will be running in Winter 2017. The abstract for this talk is below.
<br/><br/>
</p>
<p>
The most common problem in acoustics is to measure the frequency
response of an (expensive!) listening room. While specifications
exist for the amplifiers, speakers, etc, each system must be still
evaluated individually, since the frequency response depends on the
direct sound from the speaker(s), the listener position and the
reverberation of the room. The user may spend considerable time
adjusting the speaker placement, the system equalization, and
possibly treating the room to get the best response.
</p>
<p>
There are several commercial and freeware applications for this task,
some of which are very good. However, to learn the methods the user
must understand the processing involved.
</p>
<p>
The purpose of this talk is to present an open source solution. Our
system is based on a very few lines of code, written in GNU Octave, a
Matlab(r) workalike that runs under Linux, Windows and Mac.
</p>
<p>
The program works by playing a known test signal, such a tone, or
some kind of noise source out of the sound card into the system. The
system is measured by comparing driving signal to that measured by a
microphone in the room. The frequency response is computed using the
Discrete Fourier Transform (DFT).
</p>
<p>
This is joint work with Prof. John Vanderkooy, Physics, University of
Waterloo.
</p>
</abstract>
</eventitem>
<eventitem date="2016-11-16" time="8:30 PM" room="M3 1006" title="General Meeting">
<short>
<p>
This general meeting will be held to discuss changes to our Code of
Conduct.
</p>
</short>
<abstract>
<p> The Code of Conduct and the amended version can be found below: </p>
<ul>
<li><a href="https://www.csclub.uwaterloo.ca/~exec/proposed-amendment/about/code-of-conduct">Proposed CoC</a></li>
<li><a href="https://www.csclub.uwaterloo.ca/~exec/proposed-amendment.patch">Diff between current and proposed CoC</a></li>
</ul>
</abstract>
</eventitem>
<eventitem date="2016-11-16" time="6:30 pm" room="M3 1006" title="Code Party">
<short>
<p>
Come code with us, eat some food, do some things.
Personal projects you want to work on? Homework
projects you need to finish? Or want some time to explore
some new technology and chat about it? You can join us at Code Party
and do it, with great company and great food.
</p>
</short>
</eventitem>
</eventdefs>

View File

@ -0,0 +1,22 @@
module.exports = getTerm = (dateStr) => {
const date = new Date(dateStr);
let term = "";
const year = date.getUTCFullYear();
if (
new Date(`${year}-01-01`).getTime() <= date.getTime() &&
date.getTime() <= new Date(`${year}-04-30`).getTime()
) {
term = "winter";
} else if (
new Date(`${year}-05-01`).getTime() <= date.getTime() &&
date.getTime() <= new Date(`${year}-08-31`).getTime()
) {
term = "spring";
} else if (
new Date(`${year}-09-01`).getTime() <= date.getTime() &&
date.getTime() <= new Date(`${year}-12-31`).getTime()
) {
term = "fall";
}
return { year: year, term: term };
};

View File

@ -0,0 +1,79 @@
const fs = require("fs");
const crypto = require('crypto')
const jsdom = require("jsdom");
const { JSDOM } = jsdom;
const { window } = new JSDOM("");
global.window = window;
var showdown = require("showdown");
const converterShowdown = new showdown.Converter();
const libxmljs = require("libxmljs");
const getTerm = require("./getTerm.js");
fs.readFile("../../news.xml", "utf8", (err, data) => {
if (err) {
console.error(err);
return;
}
data = data.replace(/<!DOCTYPE.*>/, "");
parseXML(data);
});
fs.mkdir("./markdown-news/", { recursive: true }, (err) => {
if (err) throw err;
});
const nodeChildrenToString = (nodeChildren) => {
let string = "";
for (let i = 0; i < nodeChildren.length; ++i) {
if (nodeChildren[i].type() === "text") {
string += nodeChildren[i];
} else {
string += nodeChildren[i].toString();
}
}
// Replace <tt> with <code>
return string
.replaceAll('<tt>', '<code>')
.replaceAll('</tt>', '</code>')
};
const parseXML = (XML) => {
const xmlDoc = libxmljs.parseHtml(XML);
let newsdefsChildren = xmlDoc.get("//newsdefs").childNodes();
let currentTerm = "";
newsdefsChildren.forEach((newsItem) => {
if (newsItem.type() === "element") {
const author = newsItem.attr("author").value();
const date = newsItem.attr("date").value();
currentTerm = getTerm(date + ' EST');
const contentNodes = newsItem.childNodes();
const content = converterShowdown.makeMarkdown(
nodeChildrenToString(contentNodes)
).trim();
const md = `---
author: '${author.replace(/'/g, "''")}'
date: '${(new Date(date + ' EST').toString())}'
---
${content}`;
fs.mkdirSync(
`./markdown-news/${currentTerm.year}/${currentTerm.term}`,
{ recursive: true },
(err) => {
if (err) throw err;
},
);
const path = `./markdown-news/${currentTerm.year}/${
currentTerm.term
}/${date}-${author}-${crypto.randomUUID().slice(0, 6)}.md`;
fs.writeFileSync(path, md, (err) => {
if (err) throw err;
});
}
});
};

View File

@ -0,0 +1,18 @@
<?xml version='1.0'?>
<newsdefs>
<newsitem author="jbroman" date="2014-01-16">
<p>Elections for Winter 2014 have concluded. The following people were elected:</p>
<ul>
<li>President: Bryan Coutts (<tt>b2coutts</tt>)</li>
<li>Vice-president: Visha Vijayanand (<tt>vvijayan</tt>)</li>
<li>Treasurer: Marc Burns (<tt>m4burns</tt>)</li>
<li>Secretary: Mark Farrell (<tt>m4farrel</tt>)</li>
</ul>
<p>The following people were appointed:</p>
<ul>
<li>Sysadmin: Murphy Berzish (<tt>mtrberzi</tt>)</li>
<li>Office Manager: Nicholas Black (<tt>nablack</tt>)</li>
</ul>
</newsitem>
</newsdefs>

2569
scripts/mdx-scripts/package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,8 @@
{
"dependencies": {
"jsdom": "^16.6.0",
"libxmljs": "^0.19.7",
"node-fetch": "^2.6.1",
"showdown": "^1.9.1"
}
}

View File

@ -0,0 +1,147 @@
const fs = require("fs");
const jsdom = require("jsdom");
const fetch = require("node-fetch");
const { JSDOM } = jsdom;
const { window } = new JSDOM("");
global.window = window;
var showdown = require("showdown");
const converter = new showdown.Converter();
const libxmljs = require("libxmljs");
const MIRROR_URL = "http://mirror.csclub.uwaterloo.ca/csclub/";
try {
fs.mkdirSync("./markdown-talks/");
} catch (e) {
if (e.code !== "EEXIST") {
throw e;
}
}
const file = fs
.readFileSync("../../media/index.xml", "utf8")
.replace(/<!DOCTYPE.*>/, "");
const xml = libxmljs.parseHtml(file);
const talks = xml.find("//mediaitem");
(async () => {
const all = await Promise.all(
talks.map(async (talk, idx) => {
const { filename, markdown } = await xml2md(talk, idx);
await fs.promises.writeFile(`./markdown-talks/${filename}.md`, markdown);
}),
);
console.log(all.length);
process.exit(0);
})();
/**
*
* @param {libxmljs.Element} talk
*/
async function xml2md(talk, index) {
const title = talk.attr("title").value();
const abstract = talk
.get("abstract")
?.childNodes()
.reduce((str, node) => str + node.toString(), "")
.trim();
const presentors = talk
.get("presentor")
.childNodes()
.toString()
.split(",")
.map((s) => s.trim());
const thumbSmall = MIRROR_URL + talk.get("thumbnail").attr("file").value();
/** @type{string | undefined} */
let thumbLarge = thumbSmall.replace("-thumb-small", "-thumb-large");
if (!(await resourceExists(thumbLarge))) {
thumbLarge = undefined;
}
const links = talk
.find("mediafile")
.map((mf) => ({
/** @type{string} */
file: MIRROR_URL + mf.attr("file").value(),
/** @type{string} */
type: mf.attr("type").value(),
/** @type{string | undefined} */
size: mf.attr("size")?.value() ?? undefined,
}))
.map(
({ file, type, size }) =>
" " +
`
- file: '${file.replaceAll("'", "''")}'
type: '${type.replaceAll("'", "''")}'
${size == null ? "" : `size: '${size.replaceAll("'", "''")}'`}
`.trim(),
);
const markdown =
`
---
index: ${index}
title: '${title.replaceAll("'", "''")}'
presentors:
- ${presentors.join("\n - ").replaceAll("'", "''")}
thumbnails:
small: '${thumbSmall}'${thumbLarge ? `\n large: '${thumbLarge}'` : ""}
links:
${links.join("\n")}
---
${converter.makeMarkdown(abstract ?? "")}
`.trim() + "\n";
return {
filename: title
.toLowerCase()
.replaceAll(" ", "-")
.replaceAll(":", "")
.replaceAll("=", "")
.replaceAll(",", "-")
.replaceAll("'", "-")
.replaceAll("+", "-plus-")
.replaceAll(/-+/g, "-"),
markdown,
};
}
/**
*
* @param {string} url
*/
async function resourceExists(url) {
const time = Math.trunc(Math.random() * 10000);
await sleep(time);
const timer = setTimeout(() => {
console.log("retrying", time, url);
resourceExists(url);
}, time + 3000);
const response = await fetch(url);
clearTimeout(timer);
return (
response.status.toString().startsWith("2") ||
response.status.toString().startsWith("3")
);
}
function sleep(time) {
return new Promise((resolve) => setTimeout(resolve, time));
}