#!/sw/bin/gawk -f # script to join newick files # Cam Webb and ... BEGIN{ RS = "\x04"; # read files: "ls" | getline tmp; nfiles = split(tmp, file, "\n"); # get BACKBONE while ((getline < "EUPHYLLOPHYTE") > 0) { all = gensub(/;/, "", "G", $0); } # substitute orders for (i = 1; i < nfiles; i++) # the final \n is also counted { if ((file[i] ~ /.*ALES$/) || (file[i] ~ /^UNPL_.*[^bo~]$/)) { while ((getline < file[i]) > 0) {insert = gensub(/;/, "", "G", $0);} gsub( file[i], insert, all ); matched[i] = 1; } if (file[i] == "EUPHYLLOPHYTE") {matched[i]=1} } # substitute families for (i = 1; i < nfiles; i++) # the final \n is also counted { if (file[i] ~ /.*ACEAE$/) { while ((getline < file[i]) > 0) {insert = gensub(/;/, "", "G", $0);} gsub( file[i], insert, all ); matched[i] = 1; } } #clean gsub(/[\n\ \t]/,"", all); #check for unmatch files for (i = 1; i < nfiles; i++) { if ((matched[i] != 1) && (file[i] !~ /cbib/)) {print "file not matched: " file[i] > "/dev/stderr"} } #check for remaining MARKERS i = 1; while (i <= length(all)) { if ((substr(all,i,1) ~ /[A-Z]/) && (substr(all,i+1,1) ~ /[A-Z]/)) { tag = substr(all,i,1); while ((substr(all,i,1) ~ /[A-Z]/) && (substr(all,i+1,1) ~ /[A-Z]/)) { tag = tag substr(all,i+1,1); i++; } print "MARKER tag unmatched: " tag > "/dev/stderr"; i++; } else {i++} } print all ";"; exit; }