333 lines
10 KiB
JavaScript
333 lines
10 KiB
JavaScript
|
{
|
||
|
"translatorID": "b56d756e-814e-4b46-bc58-d61dccc9f32f",
|
||
|
"label": "Nagoya University OPAC",
|
||
|
"creator": "Frank Bennett",
|
||
|
"target": "^https?://opac\\.nul\\.nagoya-u\\.ac\\.jp/webopac/(catdbl\\.do|ctlsrh\\.do)",
|
||
|
"minVersion": "2.0b7",
|
||
|
"maxVersion": "",
|
||
|
"priority": 100,
|
||
|
"inRepository": true,
|
||
|
"translatorType": 4,
|
||
|
"browserSupport": "gcsibv",
|
||
|
"lastUpdated": "2012-07-13 07:33:49"
|
||
|
}
|
||
|
|
||
|
// #######################
|
||
|
// ##### Sample URLs #####
|
||
|
// #######################
|
||
|
|
||
|
/*
|
||
|
* The site is session-based, with page content negotiated
|
||
|
* in POST calls. The starting point for an OPAC search is
|
||
|
* the URL below. In testing, I tried the following:
|
||
|
*
|
||
|
* - A search listing of books
|
||
|
* - A search listing of journals (no icon)
|
||
|
* - A mixed search listing of books and journals
|
||
|
* - A journal page (no icon)
|
||
|
* - A book page
|
||
|
*/
|
||
|
// http://opac.nul.nagoya-u.ac.jp/webopac/catsrk.do
|
||
|
|
||
|
|
||
|
|
||
|
// #####################
|
||
|
// ##### Constants #####
|
||
|
// #####################
|
||
|
|
||
|
/*
|
||
|
* Strings corresponding to variables
|
||
|
*/
|
||
|
var pageStrings = {
|
||
|
title: ['タイトル / 著者','Title / Author'],
|
||
|
year: ['出版・頒布','Publication'],
|
||
|
isbn: ['ISBN','ISBN'],
|
||
|
authors: ['著者名リンク','Author link'],
|
||
|
series: ['シリーズ情報','Series information']
|
||
|
};
|
||
|
|
||
|
var itemUrlBase = "http://opac.nul.nagoya-u.ac.jp/webopac/catdbl.do";
|
||
|
|
||
|
// ############################
|
||
|
// ##### String functions #####
|
||
|
// ############################
|
||
|
|
||
|
/*
|
||
|
* Chop a semicolon-delimited string of authors out of a raw title string,
|
||
|
* check it for Japanese characters, and save the raw string for each author
|
||
|
* to an array. If no Japanese authors were found, save directly to the item
|
||
|
* object.
|
||
|
*/
|
||
|
var parseRomanAuthors = function (item,data) {
|
||
|
var datastring = data['title'][0];
|
||
|
// don't bother if there is no author info
|
||
|
if ( ! datastring.match(/.*\/.*/) ) {
|
||
|
return true;
|
||
|
}
|
||
|
// cut off the title
|
||
|
datastring = datastring.replace(/.*\//, "");
|
||
|
// raise flag if there are japanese characters
|
||
|
var japanese_check = datastring.match(/.*[^- &0-9()\[\];:,.a-zA-Z].*/);
|
||
|
// replace comma with semicolon in certain cases, to prepare for split
|
||
|
datastring = datastring.replace(/,(\s+[a-zA-Z]{3,})/, ";$1");
|
||
|
datastring = datastring.replace(/,(\s+[a-zA-Z]{1}[^a-zA-Z])/, ";$1");
|
||
|
datastring = datastring.replace(/(\s+and\s+)/, "; ");
|
||
|
datastring = datastring.replace(/(\s+&\s+)/, "; ");
|
||
|
// split the authors
|
||
|
var authors = datastring.replace(/\|.*/, "").split(";");
|
||
|
// this is parsing the authors for a single work. if there is a special byline, we
|
||
|
// assume that it applies to all subsequent entries until overridden.
|
||
|
var authortype = 'author';
|
||
|
for (i in authors) {
|
||
|
item.authorstrings.push(authors[i]);
|
||
|
var authortypehint = authors[i].replace(/^([ ,.:a-z]*).*/, "$1");
|
||
|
if ( authortypehint.match(/.*(edit|organiz).*/) ) {
|
||
|
authortype = "editor";
|
||
|
} else if ( authortypehint.match(/.*trans.*/) ) {
|
||
|
authortype = "translator";
|
||
|
}
|
||
|
var author = authors[i].replace(/^[ a-z]*/, "").replace( /\.\.\..*/, "" );
|
||
|
// need to test for length because the replacement of commas with semicolons
|
||
|
// can cause a short split at the end of a byline that originally ended in a comma
|
||
|
if ( ! japanese_check && author.length ) {
|
||
|
item.creators.push(Zotero.Utilities.cleanAuthor(author, authortype));
|
||
|
}
|
||
|
}
|
||
|
return japanese_check;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* For each author link, attempt to find a hint that the person
|
||
|
* is an editor or translator, first in the link text itself, then in
|
||
|
* the list of raw author strings captured by parseRomanAuthors.
|
||
|
* Clean out cruft, reverse the order of each name, and save
|
||
|
* directly to the item object.
|
||
|
*/
|
||
|
var parseJapaneseAuthors = function (item, data) {
|
||
|
var authortype = author;
|
||
|
var authors = data['authors'];
|
||
|
for (i in authors ) {
|
||
|
if ( authors[i].match(/.*編.*/) ) {
|
||
|
authortype = 'editor';
|
||
|
} else if ( authors[i].match(/.*訳.*/) ) {
|
||
|
authortype = 'translator';
|
||
|
} else {
|
||
|
authortype = 'author';
|
||
|
}
|
||
|
var author = authors[i].replace(/[*]/g,"").replace(/[0-9<()|].*/, "").replace(/(.*?),(.*)/, "$2 $1");
|
||
|
// If we claim to be an author, double-check in the English entries for a translator hint.
|
||
|
// This is an enormous pain, but the original records are a mess, with different conventions
|
||
|
// for Japanese and foreign records, sometimes mixed up in the same entry. What are you
|
||
|
// going to do.
|
||
|
for ( x in item.authorstrings ) {
|
||
|
var authorstring = item.authorstrings[x];
|
||
|
var name = author.split(" ");
|
||
|
name.reverse();
|
||
|
if ( authorstring.indexOf( name[0] ) > -1 && authorstring.match(/.*(訳|譯|譯註)$/) ) {
|
||
|
authortype = 'translator';
|
||
|
break;
|
||
|
} else if ( authorstring.indexOf( name[0] ) > -1 && authorstring.match(/.*(編|編著)$/) ) {
|
||
|
authortype = 'editor';
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
delete item.authorstrings;
|
||
|
item.creators.push(Zotero.Utilities.cleanAuthor(author, authortype));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Split extracted title field. This always starts as a single list item,
|
||
|
* but can contain entries for several works, as in an omnibus volume of
|
||
|
* translated works, for example. Such records separate the elements of
|
||
|
* the omnibus with periods that have no trailing space, so we use that as
|
||
|
* the split point. We discard the phonetic information appended to the end
|
||
|
* of the string in Japanese records.
|
||
|
*/
|
||
|
function splitTitle(data) {
|
||
|
// split in data array
|
||
|
var titlestring = data['title'][0].replace(/\|.*/, "");
|
||
|
data['title'] = titlestring.split(" . ");
|
||
|
}
|
||
|
|
||
|
// ##########################
|
||
|
// ##### Page functions #####
|
||
|
// ##########################
|
||
|
|
||
|
/*
|
||
|
* When getlist argument is nil, return a value when the target
|
||
|
* index DOM contains at least one book entry, otherwise
|
||
|
* return false.
|
||
|
*
|
||
|
* When getlist argument is true, return a list of
|
||
|
* array items for book entries in the DOM.
|
||
|
*/
|
||
|
var sniffIndexPage = function(doc,getlist){
|
||
|
var check = doc.evaluate("//td[div[@class='lst_value' and contains(text(),'Books')]]/following-sibling::td", doc, null, XPathResult.ANY_TYPE, null);
|
||
|
var node = check.iterateNext();
|
||
|
if (getlist){
|
||
|
var ret = new Object();
|
||
|
while (node){
|
||
|
var myitems = Zotero.Utilities.getItemArray(
|
||
|
doc,
|
||
|
node,
|
||
|
"document\\.catsrhform\\.pkey.value=");
|
||
|
for (var r in myitems){
|
||
|
ret[r] = myitems[r];
|
||
|
}
|
||
|
node = check.iterateNext();
|
||
|
}
|
||
|
return ret;
|
||
|
} else {
|
||
|
return node;
|
||
|
}
|
||
|
};
|
||
|
|
||
|
/*
|
||
|
* Invoke sniffIndexPage to generate a list of book
|
||
|
* items in the target DOM.
|
||
|
*/
|
||
|
var getBookItems = function(doc){
|
||
|
return sniffIndexPage(doc,true);
|
||
|
};
|
||
|
|
||
|
/*
|
||
|
* Extract data from the DOM using the var-string pairs in
|
||
|
* pageStrings as a guide to navigation.
|
||
|
*/
|
||
|
var scrapePage = function(doc, spec) {
|
||
|
var namespace = doc.documentElement.namespaceURI;
|
||
|
var nsResolver = namespace ? function(prefix) {
|
||
|
if (prefix == 'x') return namespace; else return null;
|
||
|
} : null;
|
||
|
var data = new Object();
|
||
|
for (key in spec) {
|
||
|
var check = doc.evaluate("//th[div[contains(text(),'"+spec[key][0]+"') or contains(text(),'"+spec[key][1]+"')]]/following-sibling::td/div", doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||
|
var c = check.iterateNext();
|
||
|
while (c) {
|
||
|
if (!data[key] ) {
|
||
|
data[key] = new Array();
|
||
|
}
|
||
|
data[key].push(Zotero.Utilities.trimInternal(c.textContent));
|
||
|
c = check.iterateNext();
|
||
|
}
|
||
|
}
|
||
|
return data;
|
||
|
};
|
||
|
|
||
|
/*
|
||
|
* Bring it all together.
|
||
|
*/
|
||
|
function scrapeAndParse(doc,url) {
|
||
|
if (!detectWeb(doc,url)){
|
||
|
return false;
|
||
|
}
|
||
|
var item = new Zotero.Item("book");
|
||
|
item.authorstrings = new Array();
|
||
|
var data = scrapePage(doc, pageStrings);
|
||
|
splitTitle(data);
|
||
|
|
||
|
if (data['title']) {
|
||
|
var titles = new Array();
|
||
|
for (i in data['title']) {
|
||
|
titles.push( data['title'][i].replace(/\s+\/.*/, "") );
|
||
|
}
|
||
|
item.title = titles.join(", ");
|
||
|
var jse_authors = parseRomanAuthors( item, data );
|
||
|
if ( jse_authors ) {
|
||
|
parseJapaneseAuthors( item, data );
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (data['year']) {
|
||
|
// sometimes there are multiple "date" fields, some of which are filled
|
||
|
// with other random information
|
||
|
for (i in data['year']) {
|
||
|
var year = data['year'][i];
|
||
|
if ( year.match(/.*[0-9]{3}.*/) ) {
|
||
|
item.date = year.replace(/.*?([0-9][.0-9][0-9]+).*/, "$1");
|
||
|
item.place = year.replace(/:.*/, "").replace(/[\[\]]/g, "");
|
||
|
item.publisher = year.replace(/.*:(.*),.*/, "$1");
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (data['series']) {
|
||
|
item.series = data['series'][0].replace(/[/|<].*/, "");
|
||
|
}
|
||
|
|
||
|
if (data['isbn']) {
|
||
|
item.ISBN = data['isbn'][0].replace(/[^0-9]*([0-9]+).*/, "$1");
|
||
|
}
|
||
|
item.complete();
|
||
|
}
|
||
|
|
||
|
// #########################
|
||
|
// ##### API functions #####
|
||
|
// #########################
|
||
|
|
||
|
function detectWeb(doc, url) {
|
||
|
if (url.match(/.*\/webopac\/catdbl.do/)) {
|
||
|
var journal_test = doc.evaluate( '//th[div[contains(text(),"Frequency of publication") or contains(text(),"刊行頻度") or contains(text(),"巻号") or contains(text(),"Volumes")]]', doc, null, XPathResult.ANY_TYPE, null).iterateNext();
|
||
|
if (!journal_test) {
|
||
|
return "book";
|
||
|
}
|
||
|
} else if (url.match(/.*\/webopac\/ctlsrh.do/)){
|
||
|
if (sniffIndexPage(doc)){
|
||
|
return "multiple";
|
||
|
}
|
||
|
}
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
function doWeb(doc, url) {
|
||
|
var format = detectWeb(doc, url);
|
||
|
if (format == "multiple") {
|
||
|
var items = {};
|
||
|
for (var u in Zotero.selectItems( getBookItems(doc) )){
|
||
|
var m = u.match(/.*document\.catsrhform\.pkey\.value=\'([^\']+)\'.*/);
|
||
|
items[itemUrlBase+"?pkey="+m[1]+"&initFlg=_RESULT_SET_NOTBIB"] = true;
|
||
|
}
|
||
|
var urls = [];
|
||
|
for (var u in items){
|
||
|
urls.push(u);
|
||
|
}
|
||
|
ZU.processDocuments(u, scrapeAndParse);
|
||
|
} else if (format == "book"){
|
||
|
scrapeAndParse(doc, url);
|
||
|
}
|
||
|
}
|
||
|
/** BEGIN TEST CASES **/
|
||
|
var testCases = [
|
||
|
{
|
||
|
"type": "web",
|
||
|
"url": "http://opac.nul.nagoya-u.ac.jp/webopac/catdbl.do?pkey=TY50091937&initFlg=_RESULT_SET_NOTBIB",
|
||
|
"items": [
|
||
|
{
|
||
|
"itemType": "book",
|
||
|
"creators": [
|
||
|
{
|
||
|
"firstName": "Jeremy",
|
||
|
"lastName": "Adelman",
|
||
|
"creatorType": "author"
|
||
|
}
|
||
|
],
|
||
|
"notes": [],
|
||
|
"tags": [],
|
||
|
"seeAlso": [],
|
||
|
"attachments": [],
|
||
|
"authorstrings": " Jeremy Adelman",
|
||
|
"title": "Frontier development : land, labour, and capital on the wheatlands of Argentina and Canada, 1890-1914",
|
||
|
"date": "1994",
|
||
|
"place": "Oxford",
|
||
|
"publisher": "Clarendon Press",
|
||
|
"series": "Oxford historical monographs",
|
||
|
"ISBN": "0198204418",
|
||
|
"libraryCatalog": "Nagoya University OPAC",
|
||
|
"shortTitle": "Frontier development"
|
||
|
}
|
||
|
]
|
||
|
}
|
||
|
]
|
||
|
/** END TEST CASES **/
|