#! /bin/sh #------------------------------------------------------------------------------ #$Author: antanas $ #$Date: 2019-12-06 12:49:48 +0200 (Fri, 06 Dec 2019) $ #$Revision: 7550 $ #$URL: svn://www.crystallography.net/cod-tools/tags/v2.10/scripts/molcif2sdf $ #------------------------------------------------------------------------------ #* #* Convert CIFs with multiple molecules (similar to those obtained from #* the 'cif_molecule' script) to an SDF file with a molecule description. #* Intended to be suitable for upload to PubChem. #* #* USAGE: #* $0 --options < file1.cif #* $0 --options file1.cif #* $0 --options file1.cif file2*.cif #** TMP_DIR="${TMPDIR}" set -ue ## set -x script() { echo "# $*"; cat; } setvar() { eval $1="'$3'"; } setvar Id = '$Id: molcif2sdf 7550 2019-12-06 10:49:48Z antanas $' setvar FILES = "" setvar BASENAME = "`basename $0`" setvar COD_CIF = "" #* OPTIONS: #* -C, --cod-cif 1000000.cif #* Provide the original COD CIF to extract structure metadata. #* #* --tmp-dir /tmp #* Use the specified temporary directory (default is /tmp). #* #* --help, --usage #* Output a short help message (this message) and exit. #* --version #* Output version information and exit. #** while [ $# -gt 0 ] do case $1 in -C|--cod-cif|--cod-ci|--cod-c|--cod|--co|--c) COD_CIF="$2" shift ;; --tmp-dir|--tmp-di|--tmp-d|--tmp|--tm|--t) TMP_DIR="$2" shift ;; --options|--option|--optio|--opti|--opt|--op|--o) echo "`basename $0`:: The '--options' option is a placehoder." echo "`basename $0`:: It should be replaced by one of the following options:" awk '/#\* OPTIONS:/,/#\*\*/ { sub("OPTIONS:", ""); \ sub("^ *#[*]?[*]?", ""); \ gsub("\\$0","'$0'"); \ print $0 }' $0 exit ;; --help|--hel|--he|--h|--usage) awk '/#\*/,/#\*\*/ { sub("^ *#[*]?[*]?", ""); \ gsub("\\$0","'$0'"); \ print $0 }' $0 exit ;; --version) $(dirname $0)/cod-tools-version exit ;; -*) echo "`basename $0`:: ERROR, unknown option '$1'." >&2 ; exit 1 ;; *) FILES="$FILES '$1'" ;; esac shift done ## echo ${FILES} eval set -- "${FILES}" test -z "${TMP_DIR}" && TMP_DIR="/tmp" TMP_DIR="${TMP_DIR}/tmp-${BASENAME}-$$" mkdir "${TMP_DIR}" TMP_SPLIT_DIR="${TMP_DIR}/split" mkdir ${TMP_SPLIT_DIR} cat ${1+"$@"} \ | cif_split_primitive --quiet -o ${TMP_SPLIT_DIR} ## ls -l ${TMP_SPLIT_DIR} cif_values='cif_values --no-header --no-dataname --no-filename --dont-replace-spaces --tags' ## set -x NMOLS=$(find ${TMP_SPLIT_DIR}/ -name '*.cif' | wc -l) N=0 for MOL in ${TMP_SPLIT_DIR}/* do ( cd ${TMP_SPLIT_DIR} obabel ---errorlevel 1 -iCIF $(basename $MOL) -o SDF \ | grep -v '^\$\$\$\$' ) if [ "${COD_CIF}" != "" ] then CIF="${COD_CIF}" else CIF="${MOL}" fi DATABASE_ID=$(${cif_values} _cod_database_code $CIF) test "${DATABASE_ID}" = "?" && \ DATABASE_ID=$(${cif_values} _cod_data_source_block $MOL) echo '> ' if [ $NMOLS -gt 1 ] then echo ${DATABASE_ID}_${N} else echo ${DATABASE_ID} fi echo "" echo '> ' ( ${cif_values} _chemical_name_systematic $CIF ${cif_values} _chemical_name_common $CIF ) \ | ( grep -v '^\?' || true ) \ | perl -0777 -pe 's/[ \t]+/ /g; s/^\s*|\s*$//g' echo "" echo "" echo '> ' ( ${cif_values} _publ_author_name --value-separator "; " $CIF echo "("$(${cif_values} _journal_year $CIF)")" ${cif_values} _publ_section_title $CIF ${cif_values} _journal_name_full $CIF ${cif_values} _journal_volume $CIF ${cif_values} _journal_issue $CIF echo $(${cif_values} _journal_page_first $CIF)-$(${cif_values} _journal_page_last $CIF) ) \ | perl -pe 's/-\?//; s/\(\?\)/?/; s/\?-/?/; s/\?+/?/g' \ | ( grep -v '^\?' || true ) \ | perl -0777 -pe 's/\n(.)/, $1/g; s/[ \t]+/ /g' echo "" echo '> ' echo 'https://www.crystallography.net/' echo "" echo '> ' echo "https://www.crystallography.net/cod/${DATABASE_ID}.html" echo "" echo '$$$$' N=$(expr $N + 1) done \ | perl -0777 -pe 's/^> <.*?>\n\s*\n//mg' \ | cif-to-utf8 \ | perl -CS -MUnicode::Normalize -pe \ "# from http://ahinea.com/en/tech/accented-translate.html: # 2011.12.10 \$_ = NFD(\$_); s/\\pM//g;" rm -rf "${TMP_DIR}"