ThisTest/translators/Library Catalog (Aleph).js

271 lines
9.6 KiB
JavaScript
Raw Normal View History

2022-03-23 12:58:01 +01:00
{
"translatorID": "cf87eca8-041d-b954-795a-2d86348999d5",
"label": "Library Catalog (Aleph)",
"creator": "Simon Kornblith, Michael Berkowitz, Ming Yeung Cheung",
"target": "^https?://[^/]+/F(/?[A-Z0-9\\-]*(\\?.*)?$|\\?func=find|\\?func=scan|\\?func=short|\\?local_base=)",
"minVersion": "1.0.0b3.r1",
"maxVersion": "",
"priority": 250,
"inRepository": true,
"translatorType": 4,
"browserSupport": "gcsb",
"lastUpdated": "2016-12-17 23:52:01"
}
/*
Aleph OPAC Translator
Example installations (mainly French):
http://naude.bibliotheque-mazarine.fr/
http://bibli.polytechnique.fr/
http://sifrix2.sdv.fr/
http://aleph.insa-rouen.fr
http://brenet.ens-lyon.fr
http://bu-pau.univ-pau.fr/
http://babel.bu.univ-paris5.fr
http://inti.univ-paris4.fr/
http://servaleph.univ-catholyon.fr/
http://armada.scd.univ-paris12.fr/
http://catalogue.univ-angers.fr/
http://biblio.ville-lehavre.fr/
http://opac.nebis.ch/
http://scd2.univ-lille1.fr/
http://catalogue.univ-paris1.fr/
http://source.ulg.ac.be/
http://med.cite-sciences.fr/
http://biblio.mulhouse.fr/
http://mediatheque.sigdci76.fr/
http://opac.biu-montpellier.fr/
Japanese Diet Library:
https://ndlopac.ndl.go.jp
Germany:
http://aleph-www.ub.fu-berlin.de
http://opac.hu-berlin.de
http://alephdai.ub.hu-berlin.de
https://aleph.mpg.de
Mexico:
iibiblio.unam.mx
Poland:
https://aleph.bg.pwr.wroc.pl/F
*/
function detectWeb(doc, url) {
var singleRe = new RegExp("^https?://[^/]+/F/?[A-Z0-9\-]*\?.*(?:func=full-set-set|func=direct|func=myshelf-full.*)");
if (singleRe.test(doc.location.href)) {
return "book";
} else {
var tags = doc.getElementsByTagName("a");
for (var i=0; i<tags.length; i++) {
if (singleRe.test(tags[i].href)) {
return "multiple";
}
}
}
}
function doWeb(doc, url) {
var detailRe = new RegExp("^https?://[^/]+/F/?[A-Z0-9\-]*\?.*(?:func=full-set-set|func=direct|func=myshelf-full|func=myself_full.*)");
var mab2Opac = new RegExp("^https?://(?!alephdai)[^/]+berlin|193\.30\.112\.134|duisburg-essen/F/[A-Z0-9\-]+\?.*|^https?://katalog\.ub\.uni-duesseldorf\.de/F/|^https?://aleph\.mpg\.de/F/");
var uri = doc.location.href;
var newUris = new Array();
if (detailRe.test(uri)) {
// find the 'add to basket' link where it will have the document number, replace the function with 'direct'
if (doc.evaluate('//*[contains(@href, "myshelf-add-ful-1")]', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) {
var elmts_add = doc.evaluate('//*[contains(@href, "myshelf-add-ful-1")]', doc, null, XPathResult.ANY_TYPE, null);
var adduri = elmts_add.iterateNext().attributes.getNamedItem("href").value;
adduri = adduri.replace("myshelf-add-ful-1", "direct");
//adduri = adduri.replace("myshelf-add-ful-1", "myshelf-full");
var baseuri = uri.match(".*\\?");
var funcuri = adduri.match("\\?.*");
newuri = baseuri + funcuri[0].match("[^\\?].*");
newuri += "&format=001";
//Zotero.debug('baseuri = ' + baseuri);
//Zotero.debug('funcuri = ' + funcuri);
Zotero.debug('directuri = ' + newuri);
} else {
var newuri = uri.replace(/\&format=[0-9]{3}/, "&format=001");
if (newuri == uri) newuri += "&format=001";
}
var translator = Zotero.loadTranslator("import");
if (mab2Opac.test(uri)) {
translator.setTranslator("91acf493-0de7-4473-8b62-89fd141e6c74");
} else {
translator.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
}
translator.getTranslatorObject(function (marc) {
Zotero.Utilities.processDocuments([newuri], function(newDoc) {
scrape(newDoc, marc, url);
});
});
} else {
var itemRegexp = '^https?://[^/]+/F/?[A-Z0-9\-]*\?.*(?:func=full-set-set.*\&format=999|func=direct|func=myshelf-full.*)'
var items = Zotero.Utilities.getItemArray(doc, doc, itemRegexp, '^[0-9]+$');
// ugly hack to see if we have any items
var haveItems = false;
for (var i in items) {
haveItems = true;
break;
}
// If we don't have any items otherwise, let us use the numbers
if (!haveItems) {
var items = Zotero.Utilities.getItemArray(doc, doc, itemRegexp);
// We try to get more text by grabbing the whole table row
var newItems = {};
for (var link in items) {
//Z.debug(link.match(/[A-Z0-9]{20}[A-Z0-9]*-[0-9]+\?func.*$/)[0]);
var text = ZU.xpathText(doc, '//a[contains(@href,"'+link.match(/[A-Z0-9]{20}[A-Z0-9]*-[0-9]+\?func.*$/)[0]+'")]/ancestor::tr[1]');
if (text) {
newItems[link]=text;
haveItems = true;
}
}
if (haveItems) items = newItems;
}
Zotero.selectItems(items, function (items) {
if (!items) {
return true;
}
for (var i in items) {
var newUri = i.replace("&format=999", "&format=001");
if (newUri == i) {
newUri += "&format=001";
}
newUris.push(newUri);
}
var translator = Zotero.loadTranslator("import");
if (mab2Opac.test(uri)) {
translator.setTranslator("91acf493-0de7-4473-8b62-89fd141e6c74");
} else {
translator.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
}
translator.getTranslatorObject(function (marc) {
Zotero.Utilities.processDocuments(newUris, function(newDoc) {
scrape(newDoc, marc, url);
});
});
});
}
}
function scrape(newDoc, marc, url) {
var uri = newDoc.location.href;
var nonstandard = false;
var th = false;
var ndl = false;
var xpath;
//Z.debug(uri)
if (newDoc.evaluate('//*[tr[td/text()="LDR"]]/tr[td[2]]', newDoc, null, XPathResult.ANY_TYPE, null).iterateNext()) {
xpath = '//*[tr[td/text()="LDR"]]/tr[td[2]]';
} else if (newDoc.evaluate('//tbody[tr/td[@scope="row"]/strong[contains(text(), "LDR")]]', newDoc, null, XPathResult.ANY_TYPE, null).iterateNext()) {
//UCSB Pegasus
xpath = '//tbody[tr/td[@scope="row"]/strong[contains(text(), "LDR")]]/tr';
} else if (newDoc.evaluate('//*[tr[th[normalize-space(text())="LDR"]]]/tr[td[1]]', newDoc, null, XPathResult.ANY_TYPE, null).iterateNext()) {
xpath = '//*[tr[th[normalize-space(text())="LDR"]]]/tr[td[1]]';
th = true;
} else if (newDoc.evaluate('//tr[2]//table[2]//tr', newDoc, null, XPathResult.ANY_TYPE, null).iterateNext()) {
xpath = '//tr[2]//table[2]//tr[td[2]]';
nonstandard = true;
} else if (newDoc.evaluate('//table//tr[td[2][@class="td1"]]', newDoc, null, XPathResult.ANY_TYPE, null).iterateNext()) {
xpath = '//table//tr[td[2][@class="td1"]]';
nonstandard = true
} else if (newDoc.evaluate('//table/tbody/tr[td/span/b]', newDoc, null, XPathResult.ANY_TYPE, null).iterateNext()) {
//for NDL library
xpath = '//table/tbody/tr[td/span/b]'
ndl = true;
} else if (newDoc.evaluate('//tr/td[2]/table/tbody[tr/td[contains(text(), "LDR")]]', newDoc, null, XPathResult.ANY_TYPE, null).iterateNext()) {
xpath = '//tr/td[2]/table/tbody[tr/td[contains(text(), "LDR")]]/tr';
nonstandard = true;
}
//Z.debug(xpath)
var elmts = newDoc.evaluate(xpath, newDoc, null, XPathResult.ANY_TYPE, null);
var elmt;
var record = new marc.record();
while (elmt = elmts.iterateNext()) {
if (th) {
var field = Zotero.Utilities.superCleanString(newDoc.evaluate('./th', elmt, null, XPathResult.ANY_TYPE, null).iterateNext().textContent);
} else {
var field = Zotero.Utilities.superCleanString(newDoc.evaluate('./td[1]', elmt, null, XPathResult.ANY_TYPE, null).iterateNext().textContent);
}
// if (nonstandard) {
// var field = Zotero.Utilities.superCleanString(newDoc.evaluate('./td[1]', elmt, null, XPathResult.ANY_TYPE, null).iterateNext().textContent);
// } else {
// var field = Zotero.Utilities.superCleanString(newDoc.evaluate('./TD[1]/text()[1]', elmt, null, XPathResult.ANY_TYPE, null).iterateNext().nodeValue);
// }
// var field = Zotero.Utilities.superCleanString(newDoc.evaluate('./td[1]', elmt, null, XPathResult.ANY_TYPE, null).iterateNext().textContent);
if (field) {
Z.debug(field)
var value;
if (th) {
value = newDoc.evaluate('./TD[1]', elmt, null, XPathResult.ANY_TYPE, null).iterateNext().textContent; //.split(/\n/)[1];
} else if (ndl){
value = newDoc.evaluate('./TD[3]', elmt, null, XPathResult.ANY_TYPE, null).iterateNext().textContent;
} else {
value = newDoc.evaluate('./TD[2]', elmt, null, XPathResult.ANY_TYPE, null).iterateNext().textContent; //.split(/\n/)[1];
}
if (value.split(/\n/)[1]) value = Zotero.Utilities.trimInternal(value.split(/\n/)[1]);
Zotero.debug(field + " : " + value);
if (field == "LDR") {
record.leader = value;
} else if (field != "FMT") {
value = value.replace(/\|([a-z]) /g, marc.subfieldDelimiter+"$1");
var code = field.substring(0, 3);
var ind = "";
if (field.length > 3) {
ind = field[3];
if (field.length > 4) {
ind += field[4];
}
}
record.addField(code, ind, value);
}
}
}
var newItem = new Zotero.Item();
record.translate(newItem);
var domain = url.match(/https?:\/\/([^\/]+)/);
newItem.repository = domain[1]+" Library Catalog";
for (var i in newItem.creators) {
if (!newItem.creators[i]['firstName']) {
var name = newItem.creators[i]['lastName'].split(/([^\s]+)\s+(.*)$/);
newItem.creators[i] = {lastName:name[1], firstName:name[2], creatorType:'author'};
}
}
var oldCreators = newItem.creators;
newItem.creators = new Array();
var transient = new Array();
for (var i=0; i<oldCreators.length; i++) {
var a = oldCreators[i];
if (a.lastName) {
if (!a.lastName.match(/\d+/)) transient.push(a);
}
}
for (var i=0; i<transient.length; i++) {
var a = transient[i];
if (a.firstName) {
if (a.firstName.match(/\|/)) a.firstName = a.firstName.match(/([^|]+)\s+|/)[1];
}
}
newItem.creators = transient;
newItem.title = newItem.title.replace(/(<<|>>)/g, '');
newItem.complete();
}
/** BEGIN TEST CASES **/
var testCases = []
/** END TEST CASES **/