ThisTest/translators/Nagoya University OPAC.js

{
	"translatorID": "b56d756e-814e-4b46-bc58-d61dccc9f32f",
	"label": "Nagoya University OPAC",
	"creator": "Frank Bennett",
	"target": "^https?://opac\\.nul\\.nagoya-u\\.ac\\.jp/webopac/(catdbl\\.do|ctlsrh\\.do)",
	"minVersion": "2.0b7",
	"maxVersion": "",
	"priority": 100,
	"inRepository": true,
	"translatorType": 4,
	"browserSupport": "gcsibv",
	"lastUpdated": "2012-07-13 07:33:49"
}

// #######################
// ##### Sample URLs #####
// #######################

/*
 * The site is session-based, with page content negotiated
 * in POST calls.  The starting point for an OPAC search is
 * the URL below.  In testing, I tried the following:
 *
 *   - A search listing of books
 *   - A search listing of journals (no icon)
 *   - A mixed search listing of books and journals
 *   - A journal page (no icon)
 *   - A book page
 */
// http://opac.nul.nagoya-u.ac.jp/webopac/catsrk.do


// #####################
// ##### Constants #####
// #####################

/*
 * Strings corresponding to variables
*/
var pageStrings = {
	title: ['タイトル / 著者','Title / Author'],
	year: ['出版・頒布','Publication'],
	isbn: ['ISBN','ISBN'],
	authors: ['著者名リンク','Author link'],
	series: ['シリーズ情報','Series information']
};

var itemUrlBase = "http://opac.nul.nagoya-u.ac.jp/webopac/catdbl.do";

// ############################
// ##### String functions #####
// ############################

/*
 * Chop a semicolon-delimited string of authors out of a raw title string,
 * check it for Japanese characters, and save the raw string for each author
 * to an array.  If no Japanese authors were found, save directly to the item
 * object.
 */
var parseRomanAuthors = function (item,data) {
	var datastring = data['title'][0];
	// don't bother if there is no author info
	if ( ! datastring.match(/.*\/.*/) ) {
		return true;
	}
	// cut off the title
	datastring = datastring.replace(/.*\//, "");
	// raise flag if there are japanese characters
	var japanese_check = datastring.match(/.*[^- &0-9()\[\];:,.a-zA-Z].*/);
	// replace comma with semicolon in certain cases, to prepare for split
	datastring = datastring.replace(/,(\s+[a-zA-Z]{3,})/, ";$1");
	datastring = datastring.replace(/,(\s+[a-zA-Z]{1}[^a-zA-Z])/, ";$1");
	datastring = datastring.replace(/(\s+and\s+)/, "; ");
	datastring = datastring.replace(/(\s+&\s+)/, "; ");
	// split the authors
	var authors = datastring.replace(/\|.*/, "").split(";");
	// this is parsing the authors for a single work.  if there is a special byline, we
	// assume that it applies to all subsequent entries until overridden.
	var authortype = 'author';
	for (i in authors) {
		item.authorstrings.push(authors[i]);
		var authortypehint = authors[i].replace(/^([ ,.:a-z]*).*/, "$1");
		if ( authortypehint.match(/.*(edit|organiz).*/) ) {
			authortype = "editor";
		} else if ( authortypehint.match(/.*trans.*/) ) {
			authortype = "translator";
		}
		var author = authors[i].replace(/^[ a-z]*/, "").replace( /\.\.\..*/, "" );
		// need to test for length because the replacement of commas with semicolons
		// can cause a short split at the end of a byline that originally ended in a comma
		if ( ! japanese_check && author.length ) {
			item.creators.push(Zotero.Utilities.cleanAuthor(author, authortype));
		}
	}
	return japanese_check;
}

/*
 * For each author link, attempt to find a hint that the person
 * is an editor or translator, first in the link text itself, then in
 * the list of raw author strings captured by parseRomanAuthors.
 * Clean out cruft, reverse the order of each name, and save
 * directly to the item object.
 */
var parseJapaneseAuthors = function (item, data) {
	var authortype = author;
	var authors = data['authors'];
	for (i in authors ) {
		if ( authors[i].match(/.*編.*/) ) {
			authortype = 'editor';
		} else if ( authors[i].match(/.*訳.*/) ) {
			authortype = 'translator';
		} else {
			authortype = 'author';
		}
		var author = authors[i].replace(/[*]/g,"").replace(/[0-9<()|].*/, "").replace(/(.*?),(.*)/, "$2 $1");
		// If we claim to be an author, double-check in the English entries for a translator hint.
		// This is an enormous pain, but the original records are a mess, with different conventions
		// for Japanese and foreign records, sometimes mixed up in the same entry.  What are you
		// going to do.
		for ( x in item.authorstrings ) {
			var authorstring = item.authorstrings[x];
			var name = author.split(" ");
			name.reverse();
			if ( authorstring.indexOf( name[0] ) > -1 && authorstring.match(/.*(訳|譯|譯註)$/) ) {
				authortype = 'translator';
				break;
			} else if ( authorstring.indexOf( name[0] ) > -1 && authorstring.match(/.*(編|編著)$/) ) {
				authortype = 'editor';
				break;
			}
		}
		delete item.authorstrings;
		item.creators.push(Zotero.Utilities.cleanAuthor(author, authortype));
	}
}

/*
 * Split extracted title field.  This always starts as a single list item,
 * but can contain entries for several works, as in an omnibus volume of
 * translated works, for example.  Such records separate the elements of
 * the omnibus with periods that have no trailing space, so we use that as
 * the split point.  We discard the phonetic information appended to the end
 * of the string in Japanese records.
 */
function splitTitle(data) {
	// split in data array
	var titlestring = data['title'][0].replace(/\|.*/, "");
	data['title'] = titlestring.split(" . ");
}

// ##########################
// ##### Page functions #####
// ##########################

/*
 * When getlist argument is nil, return a value when the target
 * index DOM contains at least one book entry, otherwise
 * return false.
 *
 * When getlist argument is true, return a list of
 * array items for book entries in the DOM.
 */
var sniffIndexPage = function(doc,getlist){
	var check = doc.evaluate("//td[div[@class='lst_value' and contains(text(),'Books')]]/following-sibling::td",  doc, null, XPathResult.ANY_TYPE, null);
	var node = check.iterateNext();
	if (getlist){
		var ret = new Object();
		while (node){
			var myitems = Zotero.Utilities.getItemArray(
							  doc,
							  node,
							  "document\\.catsrhform\\.pkey.value=");
			for (var r in myitems){
				ret[r] = myitems[r];
			}
			node = check.iterateNext();
		}
		return ret;
	} else {
		return node;
	}
};

/*
 * Invoke sniffIndexPage to generate a list of book
 * items in the target DOM.
 */
var getBookItems = function(doc){
	return sniffIndexPage(doc,true);
};

/*
 * Extract data from the DOM using the var-string pairs in
 * pageStrings as a guide to navigation.
 */
var scrapePage = function(doc, spec) {
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix) {
		if (prefix == 'x') return namespace; else return null;
	} : null;
	var data = new Object();
	for (key in spec) {
		var check = doc.evaluate("//th[div[contains(text(),'"+spec[key][0]+"') or contains(text(),'"+spec[key][1]+"')]]/following-sibling::td/div", doc, nsResolver, XPathResult.ANY_TYPE, null);
		var c = check.iterateNext();
		while (c) {
			if (!data[key] ) {
				data[key] = new Array();
			}
			data[key].push(Zotero.Utilities.trimInternal(c.textContent));
			c = check.iterateNext();
		}
	}
	return data;
};

/*
 * Bring it all together.
 */
function scrapeAndParse(doc,url) {
	if (!detectWeb(doc,url)){
		return false;
	}
	var item = new Zotero.Item("book");
	item.authorstrings = new Array();
	var data = scrapePage(doc, pageStrings);
	splitTitle(data);

	if (data['title']) {
		var titles = new Array();
		for (i in data['title']) {
			titles.push( data['title'][i].replace(/\s+\/.*/, "") );
		}
		item.title = titles.join(", ");
		var jse_authors = parseRomanAuthors( item, data );
		if ( jse_authors ) {
			parseJapaneseAuthors( item, data );
		}
	}

	if (data['year']) {
		// sometimes there are multiple "date" fields, some of which are filled
		// with other random information
		for (i in data['year']) {
			var year = data['year'][i];
			if ( year.match(/.*[0-9]{3}.*/) ) {
				item.date = year.replace(/.*?([0-9][.0-9][0-9]+).*/, "$1");
				item.place = year.replace(/:.*/, "").replace(/[\[\]]/g, "");
				item.publisher = year.replace(/.*:(.*),.*/, "$1");
				break;
			}
		}
	}

	if (data['series']) {
		item.series = data['series'][0].replace(/[/|<].*/, "");
	}

	if (data['isbn']) {
		item.ISBN = data['isbn'][0].replace(/[^0-9]*([0-9]+).*/, "$1");
	}
	item.complete();
}

// #########################
// ##### API functions #####
// #########################

function detectWeb(doc, url) {
	if (url.match(/.*\/webopac\/catdbl.do/)) {
		var journal_test = doc.evaluate( '//th[div[contains(text(),"Frequency of publication") or contains(text(),"刊行頻度") or contains(text(),"巻号") or contains(text(),"Volumes")]]',  doc, null, XPathResult.ANY_TYPE, null).iterateNext();
		if (!journal_test) {
			return "book";
		}
	} else if (url.match(/.*\/webopac\/ctlsrh.do/)){
		if (sniffIndexPage(doc)){
			return "multiple";
		}
	}
	return false;
}

function doWeb(doc, url) {
	var format = detectWeb(doc, url);
	if (format == "multiple") {
		var items = {};
		for (var u in Zotero.selectItems( getBookItems(doc) )){
			var m = u.match(/.*document\.catsrhform\.pkey\.value=\'([^\']+)\'.*/);
			items[itemUrlBase+"?pkey="+m[1]+"&initFlg=_RESULT_SET_NOTBIB"] = true;
		}
		var urls = [];
		for (var u in items){
			urls.push(u);
		}
		ZU.processDocuments(u, scrapeAndParse);
	} else if (format == "book"){
		scrapeAndParse(doc, url);
	}
}
/** BEGIN TEST CASES **/
var testCases = [
	{
		"type": "web",
		"url": "http://opac.nul.nagoya-u.ac.jp/webopac/catdbl.do?pkey=TY50091937&initFlg=_RESULT_SET_NOTBIB",
		"items": [
			{
				"itemType": "book",
				"creators": [
					{
						"firstName": "Jeremy",
						"lastName": "Adelman",
						"creatorType": "author"
					}
				],
				"notes": [],
				"tags": [],
				"seeAlso": [],
				"attachments": [],
				"authorstrings": " Jeremy Adelman",
				"title": "Frontier development : land, labour, and capital on the wheatlands of Argentina and Canada, 1890-1914",
				"date": "1994",
				"place": "Oxford",
				"publisher": "Clarendon Press",
				"series": "Oxford historical monographs",
				"ISBN": "0198204418",
				"libraryCatalog": "Nagoya University OPAC",
				"shortTitle": "Frontier development"
			}
		]
	}
]
/** END TEST CASES **/