#!/bin/sh

set -e  # Exit immediately if a simple command exits with a non-zero status

# $rs_line must be the same as in the prechecking.sh script:
rs_line_fields2to7="\| human \| 9606 \| [^|]* \| [^|]* \| [^|]* \| [^|]*"
rs_line="^rs[^|]* $rs_line_fields2to7"
CTG_assembly_chr_pos="| CTG | assembly=GRCh37\.p13 | chr=$1 | chr-pos=[0-9][0-9]* |"

# We keep "extra" SNPs only (i.e. NOT tagged "snp"). Extra SNPs include
# molecular variations of class "in-del", "heterozygous", "microsatellite",
# "named-locus", "no-variation", "mixed" and "multinucleotide-polymorphism".
grep -E "$rs_line \|" | \
  grep -v '| snp |' | \
  grep '| notwithdrawn |' | \
  # Keeping SNPs with at least 1 reported location on GRCh37.p13
  grep "$CTG_assembly_chr_pos" | \
  # but filtering out those with more than 1 reported location
  grep -v "$CTG_assembly_chr_pos.*$CTG_assembly_chr_pos" | \
  sed -r "s/^(rs[^|]*) \| [^|]* \| [^|]* \| ([^|]*) (\||\|.*\|) SNP \| (.*)$/\1 \2 \4/" | \
  sed -r "s/^(rs[^|]*) ([^|]*) alleles='([^|]*)' \| het=[^|]* \| se\(het\)=[^|]* (\||\|.*\|) CTG \| assembly=GRCh37\.p13 \| chr=$1 \| chr-pos=([0-9][0-9]*) \| [^|]* \| ctg-start=([0-9][0-9]*) \| ctg-end=([0-9][0-9]*) \| loctype=([^|]*) \| orient=([^| ]*)(\|)?.*$/\1\t\2\t\3\t\5\t\6\t\7\t\8\t\9/"

